In [None]:
import argparse
import os
import pandas as pd
import tensorflow as tf
import numpy as np
import datetime
import time
import logging
from joblib import dump, load
from datetime import timedelta
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model
from sklearn import svm
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import RegressorChain
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
import xgboost as xgb
import lightgbm as lightgbm

# settings:
numberOfInputWeeks = 3 # must be equal to the number of input weeks set in data preperator
numberOfOutputWeeks = 4 # must be equal to the number of output week set in data preperator



# data preperation
cantonKeys = ['AG','AI','AR', 'BE', 'BL', 'BS', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG', 'TI', 'UR', 'VD', 'VS', 'ZG','ZH']
data = pd.read_csv("completedata.csv")


outputCategories = ['case_inz_entries_7dayAverage',
                  'hosp_inz_entries_7dayAverage',
                  'death_inz_entries_7dayAverage',
                  'testPositvity_7dayAverage',
                  'transit_stations_percent_change_from_baseline_7dayAverage',
                  'workplaces_percent_change_from_baseline_7dayAverage'
                   ]



# by default all the data is training data
data[['category']] = 'train'

# the prediction interval is from 29.5.21 to 25.6.21 which contains 3 decreasing and 1 plateau (valley) weeks (forth wave)
# the corressponding input is marked accordingly
data.loc[(data['lastInputDay'] >= '2021-05-01') & (data['lastInputDay'] <= '2021-05-28'),['category']] = 'test 1'

# the prediction interval is from 12.10.20 to 26.10.20 which contains 2 increasing weeks (second wave)
data.loc[(data['lastInputDay'] >= '2020-09-28') & (data['lastInputDay'] <= '2020-10-11'),['category']] = 'test 2'

# the prediction interval is from 2.12.20 to 16.12.21 which contains 1 increasing and 1 plateau (mountain) week (third wave)
data.loc[(data['lastInputDay'] >= '2020-11-18') & (data['lastInputDay'] <= '2020-12-01'),['category']] = 'test 2'


#data[['lastInputDay']].sort_values(by='lastInputDay')
completeNumber = data.shape[0]
trainNumber = data[data['category']=='train'].shape[0]
test1Number = data[data['category']=='test 1'].shape[0]
test2Number = data[data['category']=='test 2'].shape[0]

print("Complete: "+ str(completeNumber) )
print("Training set: "+ str(trainNumber)+ "("+str(trainNumber/completeNumber)+")")
print("Test set 1 (decreasing): "+ str(test1Number)+ "("+str(test1Number/completeNumber)+")")
print("Test set 2 (increasing): "+ str(test2Number)+ "("+str(test2Number/completeNumber)+")")




numberOfOutputs = len(outputCategories)
split = numberOfOutputs * numberOfOutputWeeks + 2



inputShape = data.iloc[:,0:-split].drop(['lastInputDay'], axis=1).shape[1:]

    
def generic1(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model
    
def generic2(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic3(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic4(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic5(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic6(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[5], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic7(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[5], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[6], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic8(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=inputShape))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[5], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[6], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[7], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model
    

    
def genericKerasModel(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput):
    if len(hiddenLayers) == 1:
        return generic1(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 2:
        return generic2(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 3:
        return generic3(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 4:
        return generic4(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 5:
        return generic5(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 6:
        return generic6(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 7:
        return generic7(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    else:
        return generic8(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)

In [None]:
numberOfIterations = 10

parser = argparse.ArgumentParser()
parser.add_argument("cantonNumber", help="computes result for specific canton",type=int)
args = parser.parse_args()
cantonNumber = args.cantonNumber
'''
cantonNumber = 0
'''
# we make additional benchmark computations when we compute the results for canton 0



estimators = {
    'case_inz_entries_7dayAverage': {
        "modelClass": "Keras",
        "isMultiWeek": True,
        "hiddenLayers": [30, 20],
        "numberOfhiddenLayers": 2,
        "isTwoWay": False,
        "l1reg": 0.1,
        "alpha": 0.1,
        "dropoutValue": 0.5,
        "learningRate": 0.001,
        "epochs":68
    },
    'hosp_inz_entries_7dayAverage': {
        "modelClass": "Keras",
        "isMultiWeek": True,
        "hiddenLayers": [350, 175],
        "numberOfhiddenLayers": 2,
        "isTwoWay": False,
        "l1reg": 0.001,
        "alpha": 0,
        "dropoutValue": 0.3,
        "learningRate": 0.001,
        "epochs":5
    },
    'death_inz_entries_7dayAverage': {
        "modelClass": "Keras",
        "isMultiWeek": True,
        "hiddenLayers": [30],
        "numberOfhiddenLayers": 1,
        "isTwoWay": False,
        "l1reg": 0.001,
        "alpha": 0.01,
        "dropoutValue": 0.3,
        "learningRate": 0.001,
        "epochs":14
    },
    'testPositvity_7dayAverage': {
        "modelClass": "Keras",
        "isMultiWeek": True,
        "hiddenLayers": [175],
        "numberOfhiddenLayers": 1,
        "isTwoWay": False,
        "l1reg": 0,
        "alpha": 0.0001,
        "dropoutValue": 0.3,
        "learningRate": 0.0001,
        "epochs":43
    },
    'transit_stations_percent_change_from_baseline_7dayAverage': {
        "modelClass": "Keras",
        "isMultiWeek": True,
        "hiddenLayers": [175, 87, 40],
        "numberOfhiddenLayers": 3,
        "isTwoWay": False,
        "l1reg": 0,
        "alpha": 0.0001,
        "dropoutValue": 0.2,
        "learningRate": 0.0001,
        "epochs":17
    },
    'workplaces_percent_change_from_baseline_7dayAverage': {
        "modelClass": "Keras",
        "isMultiWeek": True,
        "hiddenLayers": [1000],
        "numberOfhiddenLayers": 1,
        "isTwoWay": False,
        "l1reg": 0.01,
        "alpha": 0.0001,
        "dropoutValue": 0,
        "learningRate": 0.001,
        "task":'workplaces_percent_change_from_baseline_7dayAverage',
        "epochs":23
    }
}  


if not os.path.exists('transferability/'):
    os.makedirs('transferability')

    
# first we compute a benchmark    
# Experiment 1 (seen case / base case)
if cantonNumber == 0:
    resultsDf = pd.DataFrame()
    for task in outputCategories:
        # train on all cantons including i & validate on i (we train the model just once and validate all 
        # 26 canton in one job)     

        # get the selected model for the specific task
        estimator = genericKerasModel(estimators[task]["learningRate"], 
                          estimators[task]["hiddenLayers"], 
                          estimators[task]["dropoutValue"], 
                          estimators[task]["l1reg"], 
                          estimators[task]["alpha"], 
                          estimators[task]["isMultiWeek"])
        
        pip = Pipeline([('minmax_scaler', MinMaxScaler())])
        train_features_all = data[(data['category']=='train')].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
        train_labels_all = data[data['category']=='train'].iloc[:,-split:-2]

        for iteration in range(0,numberOfIterations):
            # train the model with the training data
            history = estimator.fit(pip.fit_transform(train_features_all.values), 
                                  train_labels_all[["output_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]].values, 
                                  batch_size=32, 
                                  epochs=estimators[task]["epochs"], 
                                  verbose=0)


            # compute and safe results for every canton
            for cantonId in cantonKeys:
                # get test features from the specific canton
                test1_features_cId = data[(data['category']=='test 1') & (data['cantonId']==cantonId)].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
                test2_features_cId = data[(data['category']=='test 2') & (data['cantonId']==cantonId)].iloc[:,0:-split].drop(['lastInputDay'], axis=1)

                # get test labels from the specific canton
                test1_labels_specific = data[(data['category']=='test 1') & (data['cantonId']==cantonId)].iloc[:,-split:-2]
                test2_labels_specific = data[(data['category']=='test 2') & (data['cantonId']==cantonId)].iloc[:,-split:-2]

                # compute model predictions
                predictions1 = pd.DataFrame(estimator.predict(pip.transform(test1_features_cId.values)), 
                                            index=test1_labels_specific.index, 
                                            columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]) 

                predictions2 = pd.DataFrame(estimator.predict(pip.transform(test2_features_cId.values)), 
                                            index=test2_labels_specific.index, 
                                            columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]) 

                # compute error
                # compute and safe results for every week
                for outputWeekNumber in range(0,numberOfOutputWeeks):
                    # compute root mean squared error for validation sets
                    rmse1 = np.sqrt(mean_squared_error(predictions1["pred_week_"+task+"_"+str(outputWeekNumber)], test1_labels_specific["output_"+task+"_"+str(outputWeekNumber)]))
                    rmse2 = np.sqrt(mean_squared_error(predictions2["pred_week_"+task+"_"+str(outputWeekNumber)], test2_labels_specific["output_"+task+"_"+str(outputWeekNumber)]))
                    # safe the results and all model parameters
                    res = {'task':[task],
                         'week':[outputWeekNumber], 
                         'model rmse 1':[rmse1], 
                         'model rmse 2':[rmse2],
                         'trainedOn': 'all',
                         'errorForCanton': cantonId,
                         'iteration': iteration
                        }
                    resultsDf = resultsDf.append(pd.DataFrame(data=res), ignore_index = True)
                
    if not os.path.exists('transferability/allCantonalTraining'):
                os.makedirs('transferability/allCantonalTraining')            
    resultsDf.to_csv('transferability/allCantonalTraining/all.csv', header=True, index=False)
    
    
    
# Experiment 2 (unseen case / how are our predictions for cantons we did not train on?)
resultsDf = pd.DataFrame()
for task in outputCategories:
    # train on all other cantons but i & validate on i
    estimator = genericKerasModel(estimators[task]["learningRate"], 
                          estimators[task]["hiddenLayers"], 
                          estimators[task]["dropoutValue"], 
                          estimators[task]["l1reg"], 
                          estimators[task]["alpha"], 
                          estimators[task]["isMultiWeek"])
    
    pip = Pipeline([('minmax_scaler', MinMaxScaler())])
    train_features_without = data[(data['category']=='train') & (data['cantonId']!=cantonKeys[cantonNumber])].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
    train_labels_without = data[(data['category']=='train') & (data['cantonId']!=cantonKeys[cantonNumber])].iloc[:,-split:-2]
    
    
    # get test features from the specific canton
    test1_features_cId = data[(data['category']=='test 1') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
    test2_features_cId = data[(data['category']=='test 2') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,0:-split].drop(['lastInputDay'], axis=1)

    # get test labels from the specific canton
    test1_labels_specific = data[(data['category']=='test 1') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,-split:-2]
    test2_labels_specific = data[(data['category']=='test 2') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,-split:-2]
    
    for iteration in range(0,numberOfIterations):
        history = estimator.fit(pip.fit_transform(train_features_without.values), 
                                  train_labels_without[["output_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]].values, 
                                  batch_size=32, 
                                  epochs=estimators[task]["epochs"], 
                                  verbose=0)

        # compute model predictions
        predictions1 = pd.DataFrame(estimator.predict(pip.transform(test1_features_cId.values)), 
                                    index=test1_labels_specific.index, 
                                    columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]) 

        predictions2 = pd.DataFrame(estimator.predict(pip.transform(test2_features_cId.values)), 
                                    index=test2_labels_specific.index, 
                                    columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]) 

        # compute error
        # compute and safe results for every week

        for outputWeekNumber in range(0,numberOfOutputWeeks):
            # compute root mean squared error for validation sets
            rmse1 = np.sqrt(mean_squared_error(predictions1["pred_week_"+task+"_"+str(outputWeekNumber)], test1_labels_specific["output_"+task+"_"+str(outputWeekNumber)]))
            rmse2 = np.sqrt(mean_squared_error(predictions2["pred_week_"+task+"_"+str(outputWeekNumber)], test2_labels_specific["output_"+task+"_"+str(outputWeekNumber)]))
            # safe the results and all model parameters
            res = {'task':[task],
                 'week':[outputWeekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'trainedOn': 'allWithoutCantonI',
                 'errorForCanton': cantonKeys[cantonNumber],
                 'iteration': iteration
                }
            resultsDf = resultsDf.append(pd.DataFrame(data=res), ignore_index = True)
if not os.path.exists('transferability/allWithoutCantonI'):
            os.makedirs('transferability/allWithoutCantonI')            
resultsDf.to_csv('transferability/allWithoutCantonI/'+cantonKeys[cantonNumber]+'.csv', header=True, index=False)
     
    
    
    
# Experiment 3 (advantage from data of other regions / does data from other cantons help?)
resultsDf = pd.DataFrame()
for task in outputCategories:
    # train on i
    # validate on i
    estimator = genericKerasModel(estimators[task]["learningRate"], 
                          estimators[task]["hiddenLayers"], 
                          estimators[task]["dropoutValue"], 
                          estimators[task]["l1reg"], 
                          estimators[task]["alpha"], 
                          estimators[task]["isMultiWeek"])
    pip = Pipeline([('minmax_scaler', MinMaxScaler())])
    train_features_only = data[(data['category']=='train') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
    train_labels_only = data[(data['category']=='train') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,-split:-2]
    
    # get test features from the specific canton
    test1_features_cId = data[(data['category']=='test 1') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
    test2_features_cId = data[(data['category']=='test 2') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,0:-split].drop(['lastInputDay'], axis=1)

    # get test labels from the specific canton
    test1_labels_specific = data[(data['category']=='test 1') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,-split:-2]
    test2_labels_specific = data[(data['category']=='test 2') & (data['cantonId']==cantonKeys[cantonNumber])].iloc[:,-split:-2]
    
    for iteration in range(0,numberOfIterations):
        history = estimator.fit(pip.fit_transform(train_features_only.values), 
                                  train_labels_only[["output_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]].values, 
                                  batch_size=32, 
                                  epochs=estimators[task]["epochs"], 
                                  verbose=0)        

        # compute model predictions
        predictions1 = pd.DataFrame(estimator.predict(pip.transform(test1_features_cId.values)), 
                                    index=test1_labels_specific.index, 
                                    columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]) 

        predictions2 = pd.DataFrame(estimator.predict(pip.transform(test2_features_cId.values)), 
                                    index=test2_labels_specific.index, 
                                    columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]) 

        # compute error
        # compute and safe results for every week

        for outputWeekNumber in range(0,numberOfOutputWeeks):
            # compute root mean squared error for validation sets
            rmse1 = np.sqrt(mean_squared_error(predictions1["pred_week_"+task+"_"+str(outputWeekNumber)], test1_labels_specific["output_"+task+"_"+str(outputWeekNumber)]))
            rmse2 = np.sqrt(mean_squared_error(predictions2["pred_week_"+task+"_"+str(outputWeekNumber)], test2_labels_specific["output_"+task+"_"+str(outputWeekNumber)]))
            # safe the results and all model parameters
            res = {'task':[task],
                 'week':[outputWeekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'trainedOn': 'onlyCantonI',
                 'errorForCanton': cantonKeys[cantonNumber],
                 'iteration': iteration
                }
            resultsDf = resultsDf.append(pd.DataFrame(data=res), ignore_index = True)
        
if not os.path.exists('transferability/onlyCantonI'):
            os.makedirs('transferability/onlyCantonI')            
resultsDf.to_csv('transferability/onlyCantonI/'+cantonKeys[cantonNumber]+'.csv', header=True, index=False)
    
    
    
    
    
    