In [1]:
import glob
import json
import os
import pandas as pd
import random

import initialise
import common
from analysis_utils import calc_statistics, sort_file_list
from display_utils import display_frames
from model_utils import generate_ensembles

In [2]:
samples_file_europe = os.path.join(common.DATASETS_DIR, 'europe_samples_365days.csv')
models_dir = r'Z:\LFMC_data\Transfer_learning\Models'  # common.MODELS_DIR # 
model_dir = os.path.join(models_dir, 'europe_2013_transfer_200')
precision = 3       # floating point precision for saved predictions

In [3]:
with open(os.path.join(model_dir, 'experiment.json'), 'r') as f:
    experiment = json.load(f)
test_names = [x['testName'] for x in experiment['tests']]
test_names

['CONUS model for 2013',
 'common normalise; no frozen layers',
 'common normalise; one frozen layer',
 'common normalise; two frozen layers',
 'common normalise; three frozen layers',
 'separate normalise; no frozen layers',
 'separate normalise; one frozen layer',
 'separate normalise; two frozen layers',
 'separate normalise; three frozen layers']

In [4]:
preds = []
test_dirs = sort_file_list(glob.glob(os.path.join(model_dir, f'test*')), 'test')
for test_dir in test_dirs[1:]:
    test_preds = []
    run_dirs = sort_file_list(glob.glob(os.path.join(test_dir, f'run*')), 'run')
    for run_dir in run_dirs:
        run_preds = []
        epoch_dirs = sort_file_list(glob.glob(os.path.join(run_dir, f'epoch*')), 'epoch')
        for epoch_dir in epoch_dirs:
            preds_ = pd.read_csv(os.path.join(epoch_dir, 'test_predicts.csv'), index_col=0)
            run_preds.append(preds_)
        preds_ = pd.read_csv(os.path.join(run_dir, 'test_predicts.csv'), index_col=0)
        run_preds.append(preds_)
        test_preds.append(run_preds)
    preds.append(test_preds)

In [5]:
y = preds_.y
stats_dfs = []
for test_preds in preds: #[:num_ensembles]:
    summ_ = pd.concat([pd.concat([r.base for r in t], keys=pd.RangeIndex.from_range(range(0, 501, 50))) for t in test_preds],
                      axis=1, ignore_index=True).mean(axis=1).unstack()
    stats_ = {epochs: calc_statistics(y, yhat[y.index]) for epochs, yhat in summ_.iterrows()}
    summ_stats = pd.DataFrame.from_dict(stats_, orient='index')
    stats_dfs.append(summ_stats)
display_frames(stats_dfs, test_names[1:], precision=2)

Unnamed: 0,Count,RMSE,R2,Bias
0,244,33.29,-1.06,-25.83
50,244,31.67,-0.87,-23.6
100,244,27.14,-0.37,-16.44
150,244,24.86,-0.15,-11.82
200,244,23.89,-0.06,-9.11
250,244,23.49,-0.03,-7.49
300,244,23.32,-0.01,-6.47
350,244,23.26,-0.01,-5.84
400,244,23.24,-0.01,-5.44
450,244,23.24,-0.01,-5.19

Unnamed: 0,Count,RMSE,R2,Bias
0,244,33.29,-1.06,-25.83
50,244,33.08,-1.04,-24.89
100,244,27.42,-0.4,-16.41
150,244,24.95,-0.16,-11.41
200,244,23.91,-0.06,-8.57
250,244,23.45,-0.02,-6.94
300,244,23.24,-0.01,-5.98
350,244,23.13,0.0,-5.4
400,244,23.07,0.01,-5.05
450,244,23.05,0.01,-4.85

Unnamed: 0,Count,RMSE,R2,Bias
0,244,33.29,-1.06,-25.83
50,244,34.64,-1.23,-26.94
100,244,28.18,-0.48,-17.68
150,244,25.1,-0.17,-12.07
200,244,23.72,-0.05,-8.74
250,244,23.13,0.0,-6.75
300,244,22.88,0.03,-5.57
350,244,22.77,0.03,-4.87
400,244,22.72,0.04,-4.45
450,244,22.7,0.04,-4.2

Unnamed: 0,Count,RMSE,R2,Bias
0,244,33.29,-1.06,-25.83
50,244,38.07,-1.7,-31.46
100,244,29.78,-0.65,-20.34
150,244,25.73,-0.23,-13.35
200,244,23.9,-0.06,-8.96
250,244,23.12,0.0,-6.23
300,244,22.8,0.03,-4.55
350,244,22.65,0.04,-3.53
400,244,22.59,0.05,-2.91
450,244,22.55,0.05,-2.54

Unnamed: 0,Count,RMSE,R2,Bias
0,244,25.75,-0.23,-17.21
50,244,32.07,-0.91,-25.35
100,244,26.41,-0.3,-17.47
150,244,23.54,-0.03,-12.14
200,244,22.09,0.09,-8.6
250,244,21.37,0.15,-6.32
300,244,21.02,0.18,-4.89
350,244,20.85,0.19,-4.0
400,244,20.77,0.2,-3.46
450,244,20.73,0.2,-3.13

Unnamed: 0,Count,RMSE,R2,Bias
0,244,25.75,-0.23,-17.21
50,244,33.25,-1.06,-26.38
100,244,26.08,-0.27,-17.02
150,244,22.56,0.05,-11.1
200,244,21.03,0.18,-7.46
250,244,20.4,0.22,-5.21
300,244,20.14,0.24,-3.84
350,244,20.04,0.25,-3.0
400,244,20.0,0.26,-2.5
450,244,19.99,0.26,-2.19

Unnamed: 0,Count,RMSE,R2,Bias
0,244,25.75,-0.23,-17.21
50,244,32.89,-1.01,-26.11
100,244,26.36,-0.29,-17.43
150,244,23.01,0.01,-11.66
200,244,21.49,0.14,-8.04
250,244,20.84,0.19,-5.78
300,244,20.56,0.21,-4.39
350,244,20.44,0.22,-3.54
400,244,20.39,0.23,-3.02
450,244,20.37,0.23,-2.71

Unnamed: 0,Count,RMSE,R2,Bias
0,244,25.75,-0.23,-17.21
50,244,33.37,-1.07,-26.71
100,244,26.5,-0.31,-17.45
150,244,23.3,-0.01,-12.05
200,244,21.85,0.11,-8.89
250,244,21.18,0.17,-7.0
300,244,20.86,0.19,-5.87
350,244,20.7,0.2,-5.19
400,244,20.62,0.21,-4.78
450,244,20.58,0.21,-4.53


In [6]:
preds2 = []
for test_preds in preds:
    temp = []
    for epoch in range(len(test_preds[0])):
        temp.append([])
        for run in range(len(test_preds)):
            temp[-1].append(test_preds[run][epoch])
    preds2.append(temp)

In [7]:
random_seed = 46524

In [8]:
y = preds_.y
full_stats = []
num_ensembles = 100
ensemble_size = 10
for test_preds in preds2:
    summ_stats = []
    ensembles, test_stats = generate_ensembles(
        test_preds, ensemble_runs=num_ensembles, ensemble_sizes=ensemble_size, random_seed=random_seed)
    for epoch_ in test_stats:
        summ_stats.append(pd.concat([x.loc['base'] for x in epoch_], axis=1)) #.mean(axis=1)
    full_stats.append(pd.concat(summ_stats, keys=pd.RangeIndex.from_range(range(0, 501, 50))).mean(axis=1).unstack())

Generating ensembles - test 0: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 1: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 2: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 3: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 4: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 5: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 6: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100

In [9]:
display_frames(full_stats, test_names[1:], precision=2)

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,34.06,-1.2,-26.31
50,244.0,32.19,-0.94,-23.59
100,244.0,27.75,-0.44,-16.67
150,244.0,25.1,-0.18,-11.57
200,244.0,24.21,-0.09,-8.95
250,244.0,23.9,-0.06,-7.5
300,244.0,23.73,-0.05,-6.5
350,244.0,23.63,-0.04,-5.87
400,244.0,23.73,-0.05,-5.56
450,244.0,23.73,-0.05,-5.18

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,34.06,-1.2,-26.31
50,244.0,33.15,-1.06,-24.51
100,244.0,27.88,-0.45,-16.53
150,244.0,25.27,-0.19,-11.16
200,244.0,24.29,-0.1,-8.53
250,244.0,23.85,-0.06,-7.04
300,244.0,23.68,-0.04,-6.14
350,244.0,23.49,-0.03,-5.44
400,244.0,23.63,-0.04,-5.13
450,244.0,23.48,-0.03,-4.81

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,34.06,-1.2,-26.31
50,244.0,34.9,-1.28,-26.8
100,244.0,28.78,-0.55,-17.98
150,244.0,25.37,-0.2,-11.76
200,244.0,24.1,-0.08,-8.55
250,244.0,23.61,-0.04,-6.78
300,244.0,23.34,-0.01,-5.64
350,244.0,23.17,-0.0005,-4.91
400,244.0,23.31,-0.01,-4.47
450,244.0,23.19,-0.0015,-4.24

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,34.06,-1.2,-26.31
50,244.0,38.12,-1.73,-31.13
100,244.0,30.37,-0.72,-20.59
150,244.0,26.05,-0.27,-13.04
200,244.0,24.29,-0.1,-8.79
250,244.0,23.67,-0.04,-6.31
300,244.0,23.32,-0.01,-4.65
350,244.0,23.05,0.01,-3.58
400,244.0,23.22,-0.0043,-3.03
450,244.0,23.12,0.0038,-2.56

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,27.16,-0.41,-17.1
50,244.0,32.52,-0.99,-25.14
100,244.0,27.07,-0.37,-17.69
150,244.0,23.97,-0.07,-11.97
200,244.0,22.64,0.04,-8.59
250,244.0,21.84,0.11,-6.34
300,244.0,21.47,0.14,-4.92
350,244.0,21.35,0.15,-4.06
400,244.0,21.26,0.16,-3.44
450,244.0,21.23,0.16,-3.05

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,27.16,-0.41,-17.1
50,244.0,33.59,-1.12,-26.1
100,244.0,26.85,-0.35,-17.26
150,244.0,22.93,0.02,-10.78
200,244.0,21.54,0.14,-7.43
250,244.0,20.87,0.19,-5.18
300,244.0,20.62,0.21,-3.81
350,244.0,20.54,0.21,-3.09
400,244.0,20.57,0.21,-2.63
450,244.0,20.52,0.22,-2.17

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,27.16,-0.41,-17.1
50,244.0,33.32,-1.09,-26.01
100,244.0,27.12,-0.38,-17.76
150,244.0,23.4,-0.02,-11.39
200,244.0,22.12,0.09,-8.02
250,244.0,21.39,0.15,-5.76
300,244.0,21.08,0.17,-4.37
350,244.0,20.94,0.18,-3.63
400,244.0,20.9,0.19,-3.01
450,244.0,20.86,0.19,-2.66

Unnamed: 0,Count,RMSE,R2,Bias
0,244.0,27.16,-0.41,-17.1
50,244.0,33.97,-1.16,-26.68
100,244.0,27.22,-0.38,-17.67
150,244.0,23.79,-0.06,-11.9
200,244.0,22.65,0.04,-9.08
250,244.0,21.71,0.12,-6.99
300,244.0,21.43,0.14,-5.89
350,244.0,21.24,0.16,-5.23
400,244.0,21.07,0.17,-4.77
450,244.0,21.04,0.17,-4.46
