# Notebook to Analyse Results from CNN Training

In [2]:
# Load needed packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as pyplot
from sklearn.linear_model import LinearRegression

In [3]:
# Import result data and combine DF from both GDP metrics
res_dat = pd.read_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/results/cnn_results_final.csv")
print('Absolute DF: '+str(res_dat.shape))
res_dat_diff = pd.read_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/results/cnn_results_diff.csv")
print('Relative DF: '+str(res_dat_diff.shape))
res_dat = res_dat.append(res_dat_diff)
print('Combined DF: '+str(res_dat.shape))
res_dat.reset_index(drop=True, inplace=True)

Absolute DF: (75, 18)
Relative DF: (77, 18)
Combined DF: (152, 18)


# Filter by Validation MSE

## Results per Optimising Function

Just experimental as not very representative.

In [5]:
# Groupby Optimiser
res_dat.groupby(['prediction','optim'])['val_mse'].mean().reset_index().sort_values('val_mse')

Unnamed: 0,prediction,optim,val_mse
3,nuts_diff,sgd,0.402534
7,nuts_value,sgd,0.682682
6,nuts_value,rmsprob,5.105038
2,nuts_diff,rmsprob,8.221484
4,nuts_value,adam,12.708447
0,nuts_diff,adam,14.079164
5,nuts_value,adamax,38.195111
1,nuts_diff,adamax,192.52399


## Model Type

Transfer Models seem to work best.

In [7]:
# Groupby Model Type
res_dat.groupby(['prediction','transfer'])['val_mse'].mean().reset_index().sort_values(['val_mse'])

Unnamed: 0,prediction,transfer,val_mse
0,nuts_diff,inception_model,0.241636
2,nuts_diff,mobile_model,0.295845
5,nuts_value,inception_model,0.409371
1,nuts_diff,m_model,0.434973
7,nuts_value,mobile_model,0.514573
6,nuts_value,m_model,0.542444
8,nuts_value,s_model,6.384526
3,nuts_diff,s_model,10.276953
9,nuts_value,xs_model,62.654977
4,nuts_diff,xs_model,259.02089


## Learning Rate

In [8]:
# Groupby Learning Rate
res_dat.groupby(['prediction','lr'])['val_mse'].mean().reset_index().sort_values(['val_mse'])

Unnamed: 0,prediction,lr,val_mse
6,nuts_value,0.0001,1.753972
0,nuts_diff,1e-06,1.855402
4,nuts_value,1e-06,1.924946
5,nuts_value,1e-05,5.338823
1,nuts_diff,1e-05,8.728598
2,nuts_diff,0.0001,14.192922
7,nuts_value,0.001,55.335412
3,nuts_diff,0.001,201.255038


## Putting it all together

In [7]:
# Groupby all varied entities (Model, Optimiser, Learning Rate)
res_dat.groupby(['prediction','optim','transfer','lr'])['val_mse','val_mae'].mean().reset_index().sort_values(['val_mse'])

Unnamed: 0,prediction,optim,transfer,lr,val_mse,val_mae
10,nuts_diff,adam,mobile_model,0.00010,0.187100,0.314113
41,nuts_diff,rmsprob,inception_model,0.00010,0.196607,0.336306
48,nuts_diff,rmsprob,mobile_model,0.00001,0.197601,0.331402
6,nuts_diff,adam,m_model,0.00010,0.199481,0.339075
50,nuts_diff,rmsprob,mobile_model,0.00100,0.199883,0.329470
...,...,...,...,...,...,...
52,nuts_diff,rmsprob,s_model,0.00001,109.495255,3.313060
18,nuts_diff,adam,xs_model,0.00010,144.526199,6.704063
94,nuts_value,adam,xs_model,0.00100,221.912476,11.085407
114,nuts_value,adamax,xs_model,0.00100,696.742554,14.609852


## Get Minimum per Group to identify best config per model

In [9]:
# Groupby GDP Metric and Model to get best config per GDP and Model
res_dat.loc[res_dat.groupby(['prediction','transfer'])['val_mse'].idxmin()].sort_values('val_mse')

Unnamed: 0,date,no_layers,batch_size,epochs,res,dims,train_mse,train_mae,val_mse,val_mae,test_mse,test_mae,transfer,lr,optim,comment,type,prediction
141,2020-04-01,161,8,33,128,3,0.175943,0.258067,0.1871,0.314113,36107310.0,4426.158017,mobile_model,0.0001,adam,iterated nn,viirs_night,nuts_diff
128,2020-03-31,786,8,93,128,3,0.214842,0.141631,0.196607,0.336306,38737730.0,4529.964899,inception_model,0.0001,rmsprob,iterated nn,viirs_night,nuts_diff
109,2020-03-31,14,16,50,224,3,1.000012,0.402939,0.199481,0.339075,40916300.0,4837.077766,m_model,0.0001,adam,iterated nn,viirs_night,nuts_diff
77,2020-03-30,3,16,34,224,3,1.024195,0.402934,0.204136,0.335459,88541310.0,5736.178039,xs_model,1e-05,sgd,iterated nn,viirs_night,nuts_diff
99,2020-03-31,8,16,109,224,3,0.831297,0.382421,0.20428,0.333147,83647560.0,5341.10648,s_model,0.0001,rmsprob,iterated nn,viirs_night,nuts_diff
53,2020-03-31,786,8,34,224,3,0.167386,0.272442,0.340211,0.471769,179621400.0,11267.29664,inception_model,1e-05,rmsprob,iterated nn,viirs_night,nuts_value
31,2020-03-30,14,16,104,224,3,0.443608,0.497277,0.376948,0.506384,209253700.0,11950.476919,m_model,1e-05,sgd,iterated nn,viirs_night,nuts_value
2,2020-03-30,3,16,35,224,3,0.913748,0.596321,0.380223,0.503052,275852800.0,12802.620296,xs_model,1e-05,sgd,iterated nn,viirs_night,nuts_value
60,2020-03-31,161,8,49,224,3,0.210946,0.342324,0.393468,0.51583,173462700.0,11157.85445,mobile_model,0.0001,sgd,iterated nn,viirs_night,nuts_value
22,2020-03-30,8,16,42,224,3,1.000228,0.621141,0.402281,0.511368,163446500.0,10987.016705,s_model,0.001,rmsprob,iterated nn,viirs_night,nuts_value


## Detailed Country Analysis

In [9]:
# Load prediction accuracy per country for relative GDP
relative_country = pd.read_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/results/result_country_relative.csv")
relative_country = relative_country.sort_values('mse')
relative_country.head()

Unnamed: 0,country,mae,mse,size
0,ME,513.48877,263670.7,3
1,BG,622.249832,580025.7,8
2,PL,818.868851,979665.4,4
3,SK,873.117889,1281159.0,4
4,CZ,1237.313156,1854997.0,8


In [10]:
# Load prediction accuracy per country for absolute GDP
absolute_country = pd.read_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/results/result_country_abs.csv")
absolute_country = absolute_country.sort_values('mse')
absolute_country.head()

Unnamed: 0,country,mae,mse,size
0,HU,3537.720215,13982830.0,4
1,UK,4684.828823,35640750.0,28
2,ES,5281.241455,35879810.0,16
4,LT,6407.195312,50025420.0,4
3,FR,5832.748177,50855480.0,15


In [11]:
# Load country codes
country_codes = pd.read_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/gdp_data/country_codes.csv", delimiter=';')
country_codes.head()

Unnamed: 0,Code,Country
0,BE,Belgium
1,BG,Bulgaria
2,CZ,Czechia
3,DK,Denmark
4,DE,Germany


In [13]:
# Load Absolute GDP predictions
abs_preds = pd.read_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/results/result_preds_abs.csv")

## More analysis

In [14]:
# Standard Dev. per Country
country_std = abs_preds.groupby('country')['test_true_vals'].std().reset_index().sort_values('test_true_vals')
country_std.columns = ['country','std']
country_std.head()

Unnamed: 0,country,std
12,ME,404.145188
14,PL,457.347424
17,SK,485.626743
1,BG,499.821397
18,TR,816.67517


In [15]:
# Average per Country
country_mean = abs_preds.groupby('country')['test_true_vals'].mean().reset_index()
country_mean.columns = ['country','mean']
country_mean.head()

Unnamed: 0,country,mean
0,AT,39800.0
1,BG,5487.5
2,CZ,14025.0
3,DE,39602.5
4,DK,38437.5


In [16]:
# Merge prediction results with general country stats
relative_country_stats = relative_country.merge(country_std, on='country',how='left').merge(country_mean, on='country',how='left')
absolute_country_stats = absolute_country.merge(country_std, on='country',how='left').merge(country_mean, on='country',how='left')

In [17]:
# Check output
relative_country_stats.head(20)

Unnamed: 0,country,mae,mse,size,std,mean
0,ME,513.48877,263670.7,3,404.145188,5966.666667
1,BG,622.249832,580025.7,8,499.821397,5487.5
2,PL,818.868851,979665.4,4,457.347424,9175.0
3,SK,873.117889,1281159.0,4,485.626743,13775.0
4,CZ,1237.313156,1854997.0,8,1020.853984,14025.0
5,PT,1305.35199,2543652.0,16,1603.004991,16718.75
6,TR,1733.978996,3923786.0,16,816.67517,6781.25
7,LV,1780.246948,4030403.0,4,898.146239,12700.0
8,RO,2120.458679,5980381.0,8,1640.502972,6562.5
9,NL,2181.553141,8645790.0,12,3233.384098,40025.0


In [124]:
# Save results to csv
relative_country_stats.to_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/results/result_relative_country_std.csv", index=False)
absolute_country_stats.to_csv("/Users/maxbehrens/Documents/Msc/Thesis/Data/results/result_absolute_country_std.csv", index=False)

## Convert results into latex table output

In [127]:
# For Absolute GDP Predictions
for index, row in absolute_country_stats.sort_values('mse').iterrows():
    print(country_codes.loc[country_codes['Code']==row['country'],'Country'].values[0]+" & \multicolumn{1}{c}{"+str(round(row['mse']))+"} & \multicolumn{1}{c}{"+str(round(row['mae']))+"} & \multicolumn{1}{c}{"+str(round(row['size']))+"} & \multicolumn{1}{c}{"+str(round(row['mean']))+"} & \multicolumn{1}{c}{"+str(round(row['std']))+"} \\\ [1.1ex]")

Hungary & \multicolumn{1}{c}{13982832} & \multicolumn{1}{c}{3538} & \multicolumn{1}{c}{4} & \multicolumn{1}{c}{23700} & \multicolumn{1}{c}{1699} \\ [1.1ex]
United Kingdom & \multicolumn{1}{c}{35640754} & \multicolumn{1}{c}{4685} & \multicolumn{1}{c}{28} & \multicolumn{1}{c}{28746} & \multicolumn{1}{c}{3714} \\ [1.1ex]
Spain & \multicolumn{1}{c}{35879807} & \multicolumn{1}{c}{5281} & \multicolumn{1}{c}{16} & \multicolumn{1}{c}{20525} & \multicolumn{1}{c}{3438} \\ [1.1ex]
Lithuania & \multicolumn{1}{c}{50025422} & \multicolumn{1}{c}{6407} & \multicolumn{1}{c}{4} & \multicolumn{1}{c}{19425} & \multicolumn{1}{c}{1394} \\ [1.1ex]
France & \multicolumn{1}{c}{50855480} & \multicolumn{1}{c}{5833} & \multicolumn{1}{c}{15} & \multicolumn{1}{c}{27313} & \multicolumn{1}{c}{3474} \\ [1.1ex]
Italy & \multicolumn{1}{c}{58833615} & \multicolumn{1}{c}{6635} & \multicolumn{1}{c}{12} & \multicolumn{1}{c}{22233} & \multicolumn{1}{c}{6383} \\ [1.1ex]
Portugal & \multicolumn{1}{c}{62129150} & \multicolumn{1

In [128]:
# For relative GDP predictions
for index, row in relative_country_stats.sort_values('mse').iterrows():
    print(country_codes.loc[country_codes['Code']==row['country'],'Country'].values[0]+" & \multicolumn{1}{c}{"+str(round(row['mse']))+"} & \multicolumn{1}{c}{"+str(round(row['mae']))+"} & \multicolumn{1}{c}{"+str(round(row['size']))+"} & \multicolumn{1}{c}{"+str(round(row['mean']))+"} & \multicolumn{1}{c}{"+str(round(row['std']))+"} \\\ [1.1ex]")

Montenegro & \multicolumn{1}{c}{263671} & \multicolumn{1}{c}{513} & \multicolumn{1}{c}{3} & \multicolumn{1}{c}{5967} & \multicolumn{1}{c}{404} \\ [1.1ex]
Bulgaria & \multicolumn{1}{c}{580026} & \multicolumn{1}{c}{622} & \multicolumn{1}{c}{8} & \multicolumn{1}{c}{5488} & \multicolumn{1}{c}{500} \\ [1.1ex]
Poland & \multicolumn{1}{c}{979665} & \multicolumn{1}{c}{819} & \multicolumn{1}{c}{4} & \multicolumn{1}{c}{9175} & \multicolumn{1}{c}{457} \\ [1.1ex]
Slovakia & \multicolumn{1}{c}{1281159} & \multicolumn{1}{c}{873} & \multicolumn{1}{c}{4} & \multicolumn{1}{c}{13775} & \multicolumn{1}{c}{486} \\ [1.1ex]
Czechia & \multicolumn{1}{c}{1854997} & \multicolumn{1}{c}{1237} & \multicolumn{1}{c}{8} & \multicolumn{1}{c}{14025} & \multicolumn{1}{c}{1021} \\ [1.1ex]
Portugal & \multicolumn{1}{c}{2543652} & \multicolumn{1}{c}{1305} & \multicolumn{1}{c}{16} & \multicolumn{1}{c}{16719} & \multicolumn{1}{c}{1603} \\ [1.1ex]
Turkey & \multicolumn{1}{c}{3923786} & \multicolumn{1}{c}{1734} & \multicolumn