# Evaluate CWB QPE Pre-QC

In the previous example we have developed tools for deriving time-by-station QPE from CWB pre-QC data. Now we are going to evalute the results against station records.

## Read QPE data

In [1]:
stid = ['466880','466910','466920','466930','466940']

import pandas as pd
import numpy as np

qpe = pd.read_csv('../ws.cwb/cwbqpe_eval.csv')
qpe = qpe.loc[:,['timestamp']+stid]

qpe['date'] = qpe['timestamp'].apply(lambda x: int(x/100))
qpe.head()

Unnamed: 0,timestamp,466880,466910,466920,466930,466940,date
0,201401010800,0.0,0.0,0.0,0.0,0.0,2014010108
1,201401010900,0.0,0.0,0.0,0.0,0.0,2014010109
2,201401011000,0.0,0.0,0.0,0.0,0.0,2014010110
3,201401011100,0.0,0.0,0.0,0.0,0.0,2014010111
4,201401011200,0.0,0.0,0.0,0.0,0.0,2014010112


## Read Station Data and Merge

In [2]:
print('Station Precipitation:')
y = pd.read_csv('data/t1hr.csv')
print(y.head())
print(y.shape)

print('')
print('Merged:')
ys_466880 = qpe.loc[:,['date','466880']].merge(y.loc[:,['date','466880']], suffixes=('_qpe','_obs'), on='date')
print(ys_466880.head())
print(ys_466880.shape)

Station Precipitation:
         date  C0A580  C0A970  466940  C0A540  C0A550  C0A9A0  C0AC60  C0A870  \
0  2013010101     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
1  2013010102     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2  2013010103     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3  2013010104     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4  2013010105     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

   466920  ...  C0A9I1  C0AD50  C0A9B0  C0A560  C0A950  C0A940  C0A570  \
0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
1     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

   C0A980  C0A9C0  C0AD40  
0     0.0     0.0

In [3]:
# Do it for all stations
ys = {}
y = pd.read_csv('data/t1hr.csv')
for id in stid:
    ys[id] = qpe.loc[:,['date',id]].merge(y.loc[:,['date',id]], suffixes=('_qpe','_obs'), on='date')
    
for k,v in ys.items():
    print(k)
    print(v.head())
    print(v.shape)

466880
         date  466880_qpe  466880_obs
0  2014010108         0.0         0.0
1  2014010109         0.0         0.0
2  2014010110         0.0         0.0
3  2014010111         0.0         0.0
4  2014010112         0.0         0.0
(25136, 3)
466910
         date  466910_qpe  466910_obs
0  2014010108         0.0         0.0
1  2014010109         0.0         0.0
2  2014010110         0.0         0.0
3  2014010111         0.0         0.0
4  2014010112         0.0         0.0
(25136, 3)
466920
         date  466920_qpe  466920_obs
0  2014010108         0.0         0.0
1  2014010109         0.0         0.0
2  2014010110         0.0         0.0
3  2014010111         0.0         0.0
4  2014010112         0.0         0.0
(25136, 3)
466930
         date  466930_qpe  466930_obs
0  2014010108         0.0         0.0
1  2014010109         0.0         0.0
2  2014010110         0.0         0.0
3  2014010111         0.0         0.0
4  2014010112         0.0         0.0
(25136, 3)
466940
         

Now we have all data paired, we will proceed will performance metrics.

## Performance Metrics

For continuous output (the amount of precipitation), we calculate the `root-mean-squared-error (RMSE)` and `correlation coeeficient`. For categorical output, we use 30mm/hr as the threshold, and derive `confusion matrix` and other metrics (see [wikipedia](https://en.wikipedia.org/wiki/Confusion_matrix) for further details).

In [4]:
def evaluate_binary(yt, yp, stid=None, ythresh=30.):
    from sklearn.metrics import confusion_matrix
    ytb = (yt>=ythresh)*1
    ypb = (yp>=ythresh)*1
    # Derive metrics
    output = {'id':stid}
    TN, FP, FN, TP = confusion_matrix(ytb, ypb).ravel()
    output['true_positive'] = np.round(TP,2)
    output['false_positive'] = np.round(FP,2)
    output['false_negative'] = np.round(FN,2)
    output['true_negative'] = np.round(TN,2)
    output['sensitivity'] = np.round(TP/(TP+FN),2)
    output['specificity'] = np.round(TN/(FP+TN),2)
    output['prevalence'] = np.round((TP+FN)/(FN+TP+FP+TN),8)
    output['ppv'] = np.round(TP/(TP+FP),4)
    output['npv'] = np.round(TN/(TN+FN),4)
    output['fpr'] = np.round(FP/(FP+TN),4)
    output['fnr'] = np.round(FN/(FN+TP),4)
    output['fdr'] = np.round(FP/(FP+TP),4)
    output['FOR'] = np.round(FN/(TN+FN),4)
    output['accuracy'] = np.round((TP+TN)/(FN+TP+FP+TN),4)
    output['F1'] = np.round(2*TP/(2*TP+FP+FN),4)
    output['MCC'] = np.round((TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)),4)
    output['informedness'] = np.round(output['sensitivity'] + output['specificity'] - 1,4)
    output['markedness'] = np.round(output['ppv'] + output['npv'] -1,4)
    return(output)
    
evaluate_binary(ys['466880']['466880_obs'], ys['466880']['466880_qpe'])

{'id': None,
 'true_positive': 8,
 'false_positive': 17,
 'false_negative': 15,
 'true_negative': 25096,
 'sensitivity': 0.35,
 'specificity': 1.0,
 'prevalence': 0.00091502,
 'ppv': 0.32,
 'npv': 0.9994,
 'fpr': 0.0007,
 'fnr': 0.6522,
 'fdr': 0.68,
 'FOR': 0.0006,
 'accuracy': 0.9987,
 'F1': 0.3333,
 'MCC': 0.333,
 'informedness': 0.35,
 'markedness': 0.3194}

In [5]:
results = []
for i in stid:
    ytlab = i+'_obs'
    yplab = i+'_qpe'
    tmp = evaluate_binary(ys[i][ytlab], ys[i][yplab], stid=i)
    results.append(tmp)

results = pd.DataFrame(results)
results.to_csv('data/eval_cwbqpe.csv', index=False)
print(results)

       id  true_positive  false_positive  false_negative  true_negative  \
0  466880              8              17              15          25096   
1  466910              3               7              23          25103   
2  466920              7              21              15          25093   
3  466930              9               4              25          25098   
4  466940              3               5              16          25112   

   sensitivity  specificity  prevalence     ppv     npv     fpr     fnr  \
0         0.35          1.0    0.000915  0.3200  0.9994  0.0007  0.6522   
1         0.12          1.0    0.001034  0.3000  0.9991  0.0003  0.8846   
2         0.32          1.0    0.000875  0.2500  0.9994  0.0008  0.6818   
3         0.26          1.0    0.001353  0.6923  0.9990  0.0002  0.7353   
4         0.16          1.0    0.000756  0.3750  0.9994  0.0002  0.8421   

      fdr     FOR  accuracy      F1     MCC  informedness  markedness  
0  0.6800  0.0006    0.998

## Evaluate the ML approach

We use the same evaluation metrics on our ML approach.

In [6]:
def evaluate_mlc(yt, yp, stid=None, ytth=3, ypth=1):
    from sklearn.metrics import confusion_matrix
    ytb = (yt>ytth)*1
    ypb = (yp>ypth)*1
    # Derive metrics
    output = {'id':stid}
    TN, FP, FN, TP = confusion_matrix(ytb, ypb).ravel()
    output['true_positive'] = np.round(TP,2)
    output['false_positive'] = np.round(FP,2)
    output['false_negative'] = np.round(FN,2)
    output['true_negative'] = np.round(TN,2)
    output['sensitivity'] = np.round(TP/(TP+FN),2)
    output['specificity'] = np.round(TN/(FP+TN),2)
    output['prevalence'] = np.round((TP+FN)/(FN+TP+FP+TN),8)
    output['ppv'] = np.round(TP/(TP+FP),4)
    output['npv'] = np.round(TN/(TN+FN),4)
    output['fpr'] = np.round(FP/(FP+TN),4)
    output['fnr'] = np.round(FN/(FN+TP),4)
    output['fdr'] = np.round(FP/(FP+TP),4)
    output['FOR'] = np.round(FN/(TN+FN),4)
    output['accuracy'] = np.round((TP+TN)/(FN+TP+FP+TN),4)
    output['F1'] = np.round(2*TP/(2*TP+FP+FN),4)
    output['MCC'] = np.round((TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)),4)
    output['informedness'] = np.round(output['sensitivity'] + output['specificity'] - 1,4)
    output['markedness'] = np.round(output['ppv'] + output['npv'] -1,4)
    return(output)


MLDIR = '../ws.cwb/evaluate_cv/'
MLEXT = '_e20cv_mlc_ys.csv'

tmp = pd.read_csv(MLDIR+stid[0]+MLEXT)
print(tmp.head())

evaluate_mlc(yt=tmp['y'], yp=tmp['y_pred'], stid=stid[0])

         date  y  y_pred   y0        y1            y2            y3   y4
0  2013010112  1       1  1.0  0.989957  7.748604e-07  2.980232e-08  0.0
1  2013010113  1       1  1.0  0.999817  5.960464e-08  0.000000e+00  0.0
2  2013010114  1       1  1.0  0.998764  2.235174e-06  2.980232e-08  0.0
3  2013010115  1       0  1.0  0.067918  2.980232e-07  0.000000e+00  0.0
4  2013010116  1       1  1.0  0.976671  0.000000e+00  0.000000e+00  0.0


{'id': '466880',
 'true_positive': 16,
 'false_positive': 107,
 'false_negative': 11,
 'true_negative': 30187,
 'sensitivity': 0.59,
 'specificity': 1.0,
 'prevalence': 0.00089047,
 'ppv': 0.1301,
 'npv': 0.9996,
 'fpr': 0.0035,
 'fnr': 0.4074,
 'fdr': 0.8699,
 'FOR': 0.0004,
 'accuracy': 0.9961,
 'F1': 0.2133,
 'MCC': 0.2764,
 'informedness': 0.59,
 'markedness': 0.1297}

In [7]:
# Loop through stid
results_ml = []
for i in stid:
    ysml = pd.read_csv(MLDIR + i + MLEXT)
    tmp = evaluate_mlc(ysml['y'], ysml['y_pred'], stid=i)
    results_ml.append(tmp)

results_ml = pd.DataFrame(results_ml)
results_ml.to_csv('data/eval_ml.csv', index=False)
print(results_ml)

       id  true_positive  false_positive  false_negative  true_negative  \
0  466880             16             107              11          30187   
1  466910             29             257              11          29766   
2  466920             13              74              18          30063   
3  466930             34             207               8          30806   
4  466940              9             100              16          30440   

   sensitivity  specificity  prevalence     ppv     npv     fpr     fnr  \
0         0.59         1.00    0.000890  0.1301  0.9996  0.0035  0.4074   
1         0.72         0.99    0.001331  0.1014  0.9996  0.0086  0.2750   
2         0.42         1.00    0.001028  0.1494  0.9994  0.0025  0.5806   
3         0.81         0.99    0.001352  0.1411  0.9997  0.0067  0.1905   
4         0.36         1.00    0.000818  0.0826  0.9995  0.0033  0.6400   

      fdr     FOR  accuracy      F1     MCC  informedness  markedness  
0  0.8699  0.0004    0.996