In [1]:
import glob
import json
import os
import pandas as pd
import random

import initialise
import common
from analysis_utils import calc_statistics, sort_file_list
from display_utils import display_frames
from model_utils import generate_ensembles

In [2]:
samples_file_europe = os.path.join(common.DATASETS_DIR, 'europe_samples_365days.csv')
models_dir = r'Z:\LFMC_data\Transfer_learning\Models'  # common.MODELS_DIR # 
model_dir = os.path.join(models_dir, 'europe_2013_architecture')
precision = 3       # floating point precision for saved predictions

In [3]:
with open(os.path.join(model_dir, 'experiment.json'), 'r') as f:
    experiment = json.load(f)
test_names = [x['testName'] for x in experiment['tests']]
test_names

['No augmentation; no target normalise; no topography',
 'Augmentation; no target normalise; no topography',
 'No augmentation; target normalise; no topography',
 'Augmentation; target normalise; no topography',
 'No augmentation; no target normalise; topography',
 'Augmentation; no target normalise; topography',
 'No augmentation; target normalise; topography',
 'Augmentation; target normalise; topography']

In [4]:
preds = []
test_dirs = sort_file_list(glob.glob(os.path.join(model_dir, f'test*')), 'test')
for test_dir in test_dirs:
    test_preds = []
    run_dirs = sort_file_list(glob.glob(os.path.join(test_dir, f'run*')), 'run')
    for run_dir in run_dirs:
        run_preds = []
        epoch_dirs = sort_file_list(glob.glob(os.path.join(run_dir, f'epoch*')), 'epoch')
        for epoch_dir in epoch_dirs:
            preds_ = pd.read_csv(os.path.join(epoch_dir, 'test_predicts.csv'), index_col=0)
            run_preds.append(preds_)
        preds_ = pd.read_csv(os.path.join(run_dir, 'test_predicts.csv'), index_col=0)
        run_preds.append(preds_)
        test_preds.append(run_preds)
    preds.append(test_preds)

In [5]:
y = preds_.y
stats_dfs = []
for test_preds in preds: #[:num_ensembles]:
    summ_ = pd.concat([pd.concat([r.base for r in t], keys=pd.RangeIndex.from_range(range(50, 501, 50))) for t in test_preds],
                      axis=1, ignore_index=True).mean(axis=1).unstack()
    stats_ = {epochs: calc_statistics(y, yhat[y.index]) for epochs, yhat in summ_.iterrows()}
    summ_stats = pd.DataFrame.from_dict(stats_, orient='index')
    stats_dfs.append(summ_stats)
display_frames(stats_dfs, test_names, precision=2)

Unnamed: 0,Count,RMSE,R2,Bias
50,244,29.47,-0.62,-21.18
100,244,21.92,0.1,-6.9
150,244,21.57,0.13,-5.06
200,244,21.37,0.15,-3.82
250,244,20.96,0.18,-2.61
300,244,20.85,0.19,-2.01
350,244,20.86,0.19,-0.79
400,244,20.72,0.2,-0.26
450,244,20.7,0.2,0.2
500,244,20.72,0.2,0.66

Unnamed: 0,Count,RMSE,R2,Bias
50,244,30.76,-0.76,-23.53
100,244,21.11,0.17,-5.83
150,244,20.94,0.18,-3.8
200,244,20.83,0.19,-2.79
250,244,20.73,0.2,-1.87
300,244,20.78,0.2,-0.23
350,244,20.84,0.19,0.21
400,244,20.92,0.19,0.96
450,244,20.85,0.19,1.6
500,244,20.82,0.19,1.72

Unnamed: 0,Count,RMSE,R2,Bias
50,244,21.6,0.13,-6.0
100,244,21.23,0.16,-4.75
150,244,21.27,0.16,-4.64
200,244,21.33,0.15,-4.65
250,244,21.34,0.15,-4.54
300,244,21.33,0.15,-4.61
350,244,21.35,0.15,-4.6
400,244,21.38,0.15,-4.68
450,244,21.39,0.15,-4.66
500,244,21.37,0.15,-4.73

Unnamed: 0,Count,RMSE,R2,Bias
50,244,20.16,0.24,-4.2
100,244,20.01,0.25,-3.53
150,244,19.98,0.26,-3.5
200,244,20.0,0.25,-3.6
250,244,20.04,0.25,-3.51
300,244,20.09,0.25,-3.59
350,244,20.19,0.24,-3.67
400,244,20.22,0.24,-3.72
450,244,20.32,0.23,-3.61
500,244,20.3,0.23,-3.68

Unnamed: 0,Count,RMSE,R2,Bias
50,244,29.77,-0.65,-21.3
100,244,22.25,0.08,-6.2
150,244,21.68,0.12,-3.52
200,244,21.6,0.13,-2.06
250,244,21.62,0.13,-1.54
300,244,21.59,0.13,-0.27
350,244,21.42,0.15,-0.11
400,244,21.51,0.14,1.08
450,244,21.49,0.14,1.12
500,244,21.32,0.15,1.29

Unnamed: 0,Count,RMSE,R2,Bias
50,244,28.6,-0.52,-20.58
100,244,21.42,0.15,-4.82
150,244,21.02,0.18,-1.85
200,244,20.85,0.19,-0.81
250,244,20.96,0.18,-0.07
300,244,20.91,0.19,0.57
350,244,20.92,0.19,1.47
400,244,21.01,0.18,1.66
450,244,20.98,0.18,2.08
500,244,20.99,0.18,2.1

Unnamed: 0,Count,RMSE,R2,Bias
50,244,21.13,0.17,-3.86
100,244,21.0,0.18,-3.55
150,244,21.12,0.17,-3.65
200,244,21.31,0.15,-3.9
250,244,21.41,0.15,-3.99
300,244,21.49,0.14,-4.27
350,244,21.49,0.14,-3.97
400,244,21.58,0.13,-4.12
450,244,21.59,0.13,-4.09
500,244,21.58,0.13,-4.15

Unnamed: 0,Count,RMSE,R2,Bias
50,244,20.25,0.24,-2.79
100,244,20.26,0.24,-2.49
150,244,20.35,0.23,-2.79
200,244,20.4,0.23,-2.82
250,244,20.61,0.21,-3.04
300,244,20.67,0.2,-3.08
350,244,20.8,0.19,-3.38
400,244,20.8,0.19,-3.14
450,244,20.88,0.19,-3.43
500,244,20.89,0.19,-3.46


In [6]:
# preds = []
# test_dirs = sort_file_list(glob.glob(os.path.join(model_dir, f'test*')), 'test')
# for test_dir in test_dirs[1:12]:
#     test_preds = []
#     run_dirs = sort_file_list(glob.glob(os.path.join(test_dir, f'run*')), 'run')
#     for run_dir in run_dirs:
#         run_preds = []
#         epoch_dirs = sort_file_list(glob.glob(os.path.join(run_dir, f'epoch*')), 'epoch')
#         for epoch_dir in epoch_dirs:
#             preds_ = pd.read_csv(os.path.join(epoch_dir, 'test_predicts.csv'), index_col=0)
#             run_preds.append(preds_)
#         preds_ = pd.read_csv(os.path.join(run_dir, 'test_predicts.csv'), index_col=0)
#         run_preds.append(preds_)
#         test_preds.append(run_preds)
#     temp = []
#     for epoch in range(len(test_preds[0])):
#         temp.append([])
#         for run in range(len(test_preds)):
#             temp[-1].append(test_preds[run][epoch])
#     preds.append(temp)

In [7]:
preds2 = []
for test_preds in preds:
    temp = []
    for epoch in range(len(test_preds[0])):
        temp.append([])
        for run in range(len(test_preds)):
            temp[-1].append(test_preds[run][epoch])
    preds2.append(temp)

In [8]:
random_seed = 46524

In [9]:
y = preds_.y
full_stats = []
num_ensembles = 100
ensemble_size = 10
for test_preds in preds2:
    summ_stats = []
    ensembles, test_stats = generate_ensembles(
        test_preds, ensemble_runs=num_ensembles, ensemble_sizes=ensemble_size, random_seed=random_seed)
    for epoch_ in test_stats:
        summ_stats.append(pd.concat([x.loc['base'] for x in epoch_], axis=1)) #.mean(axis=1)
    full_stats.append(pd.concat(summ_stats, keys=pd.RangeIndex.from_range(range(50, 501, 50))).mean(axis=1).unstack())

Generating ensembles - test 0: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 1: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 2: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 3: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 4: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 5: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100
Generating ensembles - test 6: .........10.........20.........30.........40.........50.........60.........70.........80.........90.........100

In [11]:
display_frames(full_stats, test_names, precision=2)

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,29.56,-0.63,-21.08
100,244.0,22.27,0.08,-6.9
150,244.0,21.92,0.1,-5.15
200,244.0,21.65,0.13,-3.84
250,244.0,21.2,0.16,-2.52
300,244.0,21.15,0.17,-1.91
350,244.0,21.18,0.16,-0.7
400,244.0,21.01,0.18,-0.28
450,244.0,21.04,0.18,0.15
500,244.0,21.12,0.17,0.61

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,30.88,-0.78,-23.48
100,244.0,21.37,0.15,-5.7
150,244.0,21.25,0.16,-3.77
200,244.0,21.1,0.17,-2.71
250,244.0,21.01,0.18,-1.95
300,244.0,21.05,0.17,-0.21
350,244.0,21.1,0.17,0.19
400,244.0,21.12,0.17,0.94
450,244.0,21.2,0.16,1.48
500,244.0,21.22,0.16,1.82

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,21.77,0.12,-5.88
100,244.0,21.51,0.14,-4.79
150,244.0,21.64,0.13,-4.73
200,244.0,21.73,0.12,-4.75
250,244.0,21.7,0.12,-4.51
300,244.0,21.72,0.12,-4.66
350,244.0,21.67,0.12,-4.65
400,244.0,21.63,0.13,-4.64
450,244.0,21.69,0.12,-4.56
500,244.0,21.69,0.12,-4.72

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,20.37,0.23,-4.16
100,244.0,20.3,0.23,-3.58
150,244.0,20.36,0.23,-3.63
200,244.0,20.37,0.23,-3.67
250,244.0,20.4,0.23,-3.45
300,244.0,20.47,0.22,-3.59
350,244.0,20.55,0.21,-3.72
400,244.0,20.47,0.22,-3.67
450,244.0,20.63,0.21,-3.57
500,244.0,20.66,0.2,-3.73

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,29.84,-0.66,-21.16
100,244.0,22.52,0.06,-6.18
150,244.0,21.99,0.1,-3.6
200,244.0,21.91,0.11,-1.99
250,244.0,21.88,0.11,-1.52
300,244.0,21.9,0.11,-0.24
350,244.0,21.74,0.12,-0.08
400,244.0,21.81,0.11,1.1
450,244.0,21.75,0.12,1.13
500,244.0,21.75,0.12,1.29

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,28.72,-0.54,-20.52
100,244.0,21.7,0.12,-4.75
150,244.0,21.31,0.15,-1.85
200,244.0,21.08,0.17,-0.82
250,244.0,21.21,0.16,-0.14
300,244.0,21.13,0.17,0.61
350,244.0,21.17,0.17,1.44
400,244.0,21.25,0.16,1.75
450,244.0,21.2,0.16,1.96
500,244.0,21.35,0.15,2.1

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,21.3,0.15,-3.83
100,244.0,21.26,0.16,-3.61
150,244.0,21.5,0.14,-3.76
200,244.0,21.65,0.13,-3.9
250,244.0,21.71,0.12,-3.91
300,244.0,21.88,0.11,-4.3
350,244.0,21.82,0.11,-3.98
400,244.0,21.8,0.11,-4.07
450,244.0,21.93,0.1,-4.07
500,244.0,21.91,0.11,-4.13

Unnamed: 0,Count,RMSE,R2,Bias
50,244.0,20.43,0.22,-2.74
100,244.0,20.55,0.21,-2.52
150,244.0,20.69,0.2,-2.91
200,244.0,20.73,0.2,-2.79
250,244.0,20.93,0.18,-2.92
300,244.0,21.03,0.18,-3.07
350,244.0,21.13,0.17,-3.43
400,244.0,21.08,0.17,-3.06
450,244.0,21.11,0.17,-3.45
500,244.0,21.17,0.17,-3.47
