In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
import seaborn as sns

### Global functions

In [50]:
def load_results(path, folds, f_ids, n_test, model_name): # n_test = number of test stations
    pred_y = np.zeros((len(folds), len(f_ids), n_test))#*np.nan
    test_y = np.zeros((len(folds), len(f_ids), n_test))#*np.nan
    for fold_id, fold in enumerate(folds):
        for file_id, file in enumerate(f_ids):
            try:
                pred_y[fold_id, file_id, :] = np.load(path+'results/'+model_name+'/fold_'+fold+'/'+file+'.npy').ravel()
                test_y[fold_id, file_id, :] = np.load(path+'data/fold_'+fold+'/test/y/'+file+'.npy').ravel()
            except:
                pass
    return pred_y, test_y

def fold_wise_rmse(pred_y, test_y, n_folds): # 2D RMSE per fold
    rmse = []
    for i in range(n_folds):
        tmp = mean_squared_error(test_y[i,:,:].ravel(), pred_y[i,:,:].ravel(), squared=False)
#         print('Fold',i,'RMSE:',round(tmp, 3))
        rmse.append(tmp)
    return rmse
        
def plot_pred_vs_test(pred_y, test_y, n_folds):
    fig, ax = plt.subplots(1, n_folds, sharex=True, sharey=True, figsize=(15,3))
    for i in range(n_folds):
        ax[i].plot(test_y[i,:,:].ravel(), test_y[i,:,:].ravel())
        ax[i].scatter(test_y[i,:,:].ravel(), pred_y[i,:,:].ravel())
    ax[0].set_xlabel('Ground truth');
    ax[0].set_ylabel('Predicted values');

# Global config

In [51]:
f_ids = [str(i).zfill(5) for i in range(277)]
folds = [str(i) for i in range(6)]
n_test = 6

## beijing-pm2.5-quadratic interpolated met data

In [52]:
path = '../production/pm25_beijing_best36/quadratic/'

### All models

In [53]:
models = ['svr', 'gp_rbf', 'gp_m32', 'gp_m12', 'gp_linear','nsgp_rbf','adain']
res = pd.DataFrame(index=models, columns=['fold_'+str(i) for i in range(len(folds))]+['avg'])
for model in models[:-1]:
    pred_y, test_y = load_results(path, folds, f_ids, n_test, model)
    fold_rmse = fold_wise_rmse(pred_y, test_y, len(folds))
    fold_rmse.append(np.mean(fold_rmse))
    res.loc[model, :] = fold_rmse

# ADAIN
for fold in folds:
    pred_y = np.load(path+'results/adain/fold_'+fold+'/pred_y.npy')
    test_y = np.load(path+'results/adain/fold_'+fold+'/test_y.npy')
    res.loc['adain', 'fold_'+fold] = mean_squared_error(test_y.ravel(), pred_y.ravel(), squared=False)
res.loc['adain','avg'] = res.loc['adain'].mean()


res

Unnamed: 0,fold_0,fold_1,fold_2,fold_3,fold_4,fold_5,avg
svr,28.6389,33.5352,34.1864,27.51,30.2856,49.265,33.9035
gp_rbf,28.4995,29.6142,30.702,26.0626,31.4658,43.7123,31.6761
gp_m32,28.6542,29.7528,30.8006,25.7725,31.4276,44.0245,31.7387
gp_m12,29.3242,30.411,31.4944,25.9099,31.7438,44.5068,32.2317
gp_linear,29.1663,207.539,37.0533,30.0921,33.7853,44.6112,63.7078
nsgp_rbf,31.9023,32.6972,33.0631,27.3794,34.409,44.5074,33.9931
adain,33.6661,45.0136,39.8352,32.4324,43.9318,50.753,40.9387
