In [11]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.linear_model import LinearRegression
import scipy
import sklearn
import pandas as pd

def plot_regression(O, P):
    r2 = scipy.stats.pearsonr(O, P)[0]**2
    plt.figure(figsize = (6, 6))
    plt.grid(zorder = 1)
    plt.scatter(O, P, edgecolor='black', linewidth=1, zorder = 2)
    regmodel = LinearRegression()
    regmodel.fit(O.reshape(-1,1), P)
    plt.plot(O.reshape(-1,1), regmodel.predict(O.reshape(-1,1)), color = 'orange', linestyle = '--', zorder = 3)
    plt.xlim([0, np.max([O, P]) + 1])
    plt.ylim([0, np.max([O, P]) + 1])
    plt.plot([0, np.max([O, P]) + 1], [0, np.max([O, P]) + 1], 'r--', zorder = 4)
    plt.legend([u'Regression  R\u00b2: {:.3f}'.format(r2), '1:1 Plot', 'Observed - Predicted'])
    plt.xlabel('Observed ETo [mm/d]')
    plt.ylabel('Predicted ETo [mm/d]');

In [12]:
def plot_regression_stats(O, P):
    # Calculate stats of O, P
    mae = sklearn.metrics.mean_absolute_error(O, P)
    mse = sklearn.metrics.mean_squared_error(O, P)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((O - P) / O)) * 100
    RSq = sklearn.metrics.r2_score(O, P)
    r2 = scipy.stats.pearsonr(O, P)[0]**2
    nse = (1-(np.sum((P-O)**2)/np.sum((O-np.mean(O))**2)))
    
    # Plot scatter
    plt.figure(figsize = (6, 6))
    plt.scatter(O, P, edgecolor='#808080', facecolors='#C0C0C0', linewidth=1, zorder = 2)
    corr, _ = scipy.stats.pearsonr(O, P) # Pearson correlation between observed and predicted
    regmodel = LinearRegression() # Apply linear regression best fit between observed and predicted
    regmodel.fit(O.reshape(-1,1), P)
    rsq = regmodel.score(O.reshape(-1,1), P)
    obs, pred = O.reshape(-1,1), regmodel.predict(O.reshape(-1,1))
    plt.plot([obs.min(), obs.max()], [pred.min(), pred.max()], color = 'black', lw = 3, zorder = 3)
    plt.xlabel('VV [db]')
    plt.ylabel('SM modelled [mm]')
    
    # Add stats on the plot
    stats = np.round([mae, mse, rmse, mape, RSq, r2], 3)
    stats = pd.DataFrame(data = stats, index = ['MAE', 'MSE', 'RMSE', 'MAPE', 'RSq', u'R\u00b2'], columns = ['Metrics'])
    stats.loc['RSq'] = np.round((stats.loc['RSq']* 100) , 2)
    stats.loc[u'R\u00b2'] = np.round((stats.loc[u'R\u00b2']* 100) , 2) 
    stats2 = pd.DataFrame(stats['Metrics'][:3].astype(str) + ' mm' )
    stats3 = pd.DataFrame(stats['Metrics'][3:].astype(str) + ' %' )
    stats = stats2.append(stats3)
    table = plt.table(cellText=stats.values, colWidths = [0.5] * len(stats.columns),
          rowLabels = stats.index, cellLoc = 'left', rowLoc = 'left', loc = 4, edges='open') # Adjust table size loc and allignment
    table.auto_set_font_size(False)
    table.set_fontsize(9)
    table.scale(.5, 1.5)
    plt.legend([u'Pearson R\u00b2: {:.3f} &\nPearson R: {:.3f}'.format(corr**2, corr), '1:1 Plot', 'Observed - Predicted'], loc = 2)