In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
import seaborn as sns

### Global functions

In [13]:
def load_results(path, folds, f_ids, n_test, model_name): # n_test = number of test stations
    pred_y = np.zeros((len(folds), len(f_ids), n_test))#*np.nan
    test_y = np.zeros((len(folds), len(f_ids), n_test))#*np.nan
    for fold_id, fold in enumerate(folds):
        for file_id, file in enumerate(f_ids):
            try:
                pred_y[fold_id, file_id, :] = np.load(path+'results/'+model_name+'/fold_'+fold+'/'+file+'.npy').ravel()
                test_y[fold_id, file_id, :] = np.load(path+'data/fold_'+fold+'/test/y/'+file+'.npy').ravel()
            except:
                pass
    return pred_y, test_y

def fold_wise_rmse(pred_y, test_y, n_folds): # 2D RMSE per fold
    rmse = []
    for i in range(n_folds):
        tmp = mean_squared_error(test_y[i,:,:].ravel(), pred_y[i,:,:].ravel(), squared=False)
#         print('Fold',i,'RMSE:',round(tmp, 3))
        rmse.append(tmp)
    return rmse
        
def plot_pred_vs_test(pred_y, test_y, n_folds):
    fig, ax = plt.subplots(1, n_folds, sharex=True, sharey=True, figsize=(15,3))
    for i in range(n_folds):
        ax[i].plot(test_y[i,:,:].ravel(), test_y[i,:,:].ravel())
        ax[i].scatter(test_y[i,:,:].ravel(), pred_y[i,:,:].ravel())
    ax[0].set_xlabel('Ground truth');
    ax[0].set_ylabel('Predicted values');

# Global config

In [14]:
f_ids = [str(i).zfill(5) for i in range(277)]
folds = [str(i) for i in range(6)]
n_test = 6

## beijing-pm2.5-quadratic interpolated met data

In [15]:
path = '../production/pm25_beijing_best36/quadratic/'

### All models

In [17]:
models = ['svr', 'gp_rbf', 'gp_m32', 'gp_m12', 'gp_linear','nsgp_rbf','adain']
res = pd.DataFrame(index=models, columns=['fold_'+str(i) for i in range(len(folds))]+['avg'])
for model in models[:-1]:
    pred_y, test_y = load_results(path, folds, f_ids, n_test, model)
    fold_rmse = fold_wise_rmse(pred_y, test_y, len(folds))
    fold_rmse.append(np.mean(fold_rmse))
    res.loc[model, :] = fold_rmse

# ADAIN
for fold in folds:
    pred_y = np.load(path+'results/adain/fold_'+fold+'/pred_y.npy')
    test_y = np.load(path+'results/adain/fold_'+fold+'/test_y.npy')
    res.loc['adain', 'fold_'+fold] = mean_squared_error(test_y.ravel(), pred_y.ravel(), squared=False)
res.loc['adain','avg'] = res.loc['adain'].mean()


res

Unnamed: 0,fold_0,fold_1,fold_2,fold_3,fold_4,fold_5,avg
svr,28.638901,33.535247,34.186379,27.510037,30.285619,49.26495,33.903522
gp_rbf,28.499524,29.614249,30.701977,26.062649,31.465777,43.712286,31.676077
gp_m32,28.654237,29.752761,30.800615,25.772486,31.427642,44.024533,31.738712
gp_m12,29.324178,30.41102,31.494377,25.90989,31.743754,44.506802,32.23167
gp_linear,29.166274,207.538515,37.053253,30.092086,33.785311,44.611238,63.70778
nsgp_rbf,31.902326,32.697181,33.063074,27.379413,34.409011,44.507379,33.993064
adain,38.240125,55.293895,40.554956,39.2224,43.931829,50.753019,44.666037
