In [1]:
import argparse
import os
import pandas as pd
import tensorflow as tf
import numpy as np
import datetime
import time
import logging
from joblib import dump, load
from datetime import timedelta
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model
from sklearn import svm
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_squared_error
from sklearn.multioutput import RegressorChain
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
import xgboost as xgb
import lightgbm as lightgbm

# settings:
numberOfInputWeeks = 3 # must be equal to the number of input weeks set in data preperator
numberOfOutputWeeks = 4 # must be equal to the number of output week set in data preperator



# data preperation
cantonKeys = ['AG','AI','AR', 'BE', 'BL', 'BS', 'FR', 'GE', 'GL', 'GR', 'JU', 'LU', 'NE', 'NW', 'OW', 'SG', 'SH', 'SO', 'SZ', 'TG', 'TI', 'UR', 'VD', 'VS', 'ZG','ZH']
data = pd.read_csv("completedata.csv")


outputCategories = ['case_inz_entries_7dayAverage',
                  'hosp_inz_entries_7dayAverage',
                  'death_inz_entries_7dayAverage',
                  'testPositvity_7dayAverage',
                  'transit_stations_percent_change_from_baseline_7dayAverage',
                  'workplaces_percent_change_from_baseline_7dayAverage'
                   ]
numberOfOutputs = len(outputCategories)

split = numberOfOutputs * numberOfOutputWeeks + 2
train_features = data[data['category']=='train'].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
train_labels = data[data['category']=='train'].iloc[:,-split:-2]

validation1_features = data[data['category']=='validation 1'].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
validation1_labels = data[data['category']=='validation 1'].iloc[:,-split:-2]
validation2_features = data[data['category']=='validation 2'].iloc[:,0:-split].drop(['lastInputDay'], axis=1)
validation2_labels = data[data['category']=='validation 2'].iloc[:,-split:-2]
validation1And2_labels = data[(data['category']=='validation 1') | (data['category']=='validation 2')].iloc[:,-split:-2]
validation1And2_features = data[(data['category']=='validation 1') | (data['category']=='validation 2')].iloc[:,0:-split].drop(['lastInputDay'], axis=1)

pip = Pipeline([('minmax_scaler', MinMaxScaler())])
X_train = pip.fit_transform(train_features[train_features.columns].values)
X_valid1 = pip.transform(validation1_features[train_features.columns].values)
X_valid2 = pip.transform(validation2_features[train_features.columns].values)
X_valid1And2 = pip.transform(validation1And2_features[train_features.columns].values)


def generic1(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model
    
def generic2(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic3(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic4(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic5(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic6(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[5], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic7(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[5], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[6], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model

def generic8(learningrate, hiddenLayers, dropout, l1, l2, isMultiOutput):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hiddenLayers[0], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2), input_shape=X_train.shape[1:]))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[1], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[2], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[3], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[4], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[5], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[6], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(hiddenLayers[7], activation="relu", kernel_regularizer=keras.regularizers.l1_l2(l1=l1, l2=l2)))
    model.add(keras.layers.Dropout(rate=dropout))
    model.add(keras.layers.Dense(numberOfOutputWeeks) if isMultiOutput else keras.layers.Dense(1))
    model.compile(loss="mean_squared_error", optimizer=keras.optimizers.Adam(learning_rate=learningrate))
    return model
    

    
def genericKerasModel(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput):
    if len(hiddenLayers) == 1:
        return generic1(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 2:
        return generic2(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 3:
        return generic3(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 4:
        return generic4(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 5:
        return generic5(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 6:
        return generic6(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    elif len(hiddenLayers) == 7:
        return generic7(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)
    else:
        return generic8(learningrate, hiddenLayers, dropout, l1regularization, l2regularization, isMultioutput)

In [2]:
estimators = []
numberOfEstimators = 0
results = pd.DataFrame()



for alpha in [0.00001, 0.0001,0.001,0.01,0.1,0,1,10, 100]:
    for isMultiWeek in [True,False]:
         estimators.append(
                {
                  "modelClass": "Ridge",
                  "isMultiWeek": isMultiWeek,
                  "alpha": alpha
                })          
#print("Ridge: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)
                    
for alpha in [0.00001, 0.0001,0.001,0.01,0.1,0,1,10, 100]:
    for isMultiWeek in [True,False]:
         estimators.append(
                {
                  "modelClass": "Lasso",
                  "isMultiWeek": isMultiWeek,
                  "alpha": alpha
                })
#print("Lasso: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)             
             
for alpha in [0.00001, 0.0001,0.001,0.01,0.1,0,1,10, 100]: 
    for isMultiWeek in [True,False]:
        for kernel in ['linear','poly','polynomial','rbf','laplacian','sigmoid','cosine']:
            estimators.append(
            {
              "modelClass": "KernelRidge",
              "isMultiWeek": isMultiWeek,
              "alpha": alpha,
              "kernel": kernel
            })
#print("KernelRidge: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)
                    
for alpha in [0.00001, 0.0001,0.001,0.01,0.1,0,1,10, 100]:
    estimators.append(
            {
              "modelClass": "MultiTaskLasso",
              "isMultiWeek": True,
              "alpha": alpha
            })
#print("MultiTaskLasso: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)
                    
      

for alpha in [0.01,0.1,0,1,10]:
    for lamb in [0.01,0.1,0,1,10]:
            for n_estimators in [100,500,1000]:
                estimators.append(
                        {
                            "modelClass": "LGBM",
                            "isMultiWeek": False,
                            "n_estimators": n_estimators,
                            "alpha": alpha,
                            "lambda": lamb
                        }
                    )
#print("LGBM: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)
                    
for alpha in [0.01,0.1,0,1,10]:
    for lamb in [0.01,0.1,0,1,10]:
            for n_estimators in [100,500,1000]:
                estimators.append(
                        {
                            "modelClass": "XGB",
                            "isMultiWeek": False,
                            "n_estimators": n_estimators,
                            "alpha": alpha,
                            "lambda": lamb
                        }
                    )
#print("XGB: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)
                    
            
for C in [0.001,0.01,0.1,1,10,100]:
    estimators.append(
                        {
                            "modelClass": "SVR",
                            "isMultiWeek": False,
                            "C": C
                        }
                    )
#print("SVR: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)

for alpha in [0.001, 0.01,0.1, 1, 10, 100]:
    for learning_rate in ['invscaling','adaptive','optimal']:
        for penalty in ['l1','l2','elasticnet']:
            estimators.append(
                {
                    "modelClass": "SGD",
                    "isMultiWeek": False ,
                    "alpha": alpha,
                    "learning_rate": learning_rate,
                    "penalty": penalty  
                }
            )
#print("SGD: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)
                    
for max_features in ["auto",10,100,200]:
    for min_samples_split in [2,3,4]:
        for min_samples_leaf in [1,2,3]:
            for n_estimators in [100,500,1000]:
                for isMultiWeek in [True,False]:
                    estimators.append(
                        {
                            "modelClass": "RandomForrest",
                            "isMultiWeek": isMultiWeek,
                            "n_estimators": n_estimators,
                            "max_features": max_features,
                            "min_samples_split": min_samples_split,
                            "min_samples_leaf": min_samples_leaf
                        }
                    )
#print("RandomForrest: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)

print("Start Keras: "+str(len(estimators)))

# adding keras model construction parameters
for learningRate in [0.001,0.0001]: 
    for dropoutValue in [0,0.2,0.3,0.5]: 
        for l1reg in [0.0001, 0.001, 0.01,0.1, 1, 0]:
            for alpha in [0.0001, 0.001, 0.01,0.1, 1, 0]:
                for hiddenLayers in [[20],
                                     [30],
                                     [40],
                                     [50],
                                     [60],
                                     [70],
                                     [80],
                                     [90],
                                     [100],
                                     [125],
                                     [150],
                                     [175],
                                     [200],
                                     [225],
                                     [250],
                                     [300],
                                     [350],
                                     [400],
                                     [500],
                                     [1000],
                                     [100,100],
                                     [125,125],
                                     [175,175],
                                     [200,200],
                                     [225,225],
                                     [250,250],
                                     [350,350],
                                     [400,400],
                                     [500,500],
                                     [1000,1000],
                                     [30,15],
                                     [30,20],
                                     [40,20],
                                     [60,30],
                                     [70,35],
                                     [80,40],
                                     [90,45],
                                     [100, 50],
                                     [125,63],
                                     [175,87],
                                     [200,100],
                                     [225,112],
                                     [250,125],
                                     [350,175],
                                     [400,200],
                                     [500,250],
                                     [1000,500],
                                     [100, 50, 25],
                                     [100, 50, 50],
                                     [125,63, 30],
                                     [175,87,40],
                                     [200, 100, 50],
                                     [225,112, 56],
                                     [250,125, 62],
                                     [350,175, 85],
                                     [300,150,75],
                                     [400,200, 100],
                                     [250,100, 20],
                                     [400,200, 50],
                                     [400,100, 50],
                                     [500,250, 125],
                                     [1000,500, 250],
                                     [400, 200, 100, 90, 80, 70, 60, 50],
                                     [500, 250, 120, 100, 90, 80, 70, 60],
                                     [400, 300, 150, 120, 100, 70, 50, 30],
                                     [400, 100, 50, 50, 50, 50, 50, 50],
                                     [400, 200, 100, 50, 50, 50, 50, 50],
                                     [200, 150, 100, 50, 50, 50, 50, 50]
                               ]:
                    estimators.append(
                        {
                          "modelClass": "Keras",
                          "isMultiWeek": True,
                          "hiddenLayers": hiddenLayers,
                          "numberOfhiddenLayers": len(hiddenLayers),
                          "isTwoWay": False,
                          "l1reg": l1reg,
                          "alpha": alpha,
                          "dropoutValue": dropoutValue,
                          "learningRate": learningRate
                        }
                    )
print("Keras: "+str(len(estimators)-numberOfEstimators))
numberOfEstimators = len(estimators)

                    
print("Total: "+str(len(estimators)))
             


parser = argparse.ArgumentParser()
parser.add_argument("estimatorId", help="computes estimator with given id",type=int)
args = parser.parse_args()
estimatorId = args.estimatorId


'''



# Test
estimators.append({"modelClass": "Ridge","alpha": 10,"isMultiWeek": True})
estimators.append({"modelClass": "Ridge","alpha": 10,"isMultiWeek": False})
estimators.append({"modelClass": "Lasso","alpha": 10,"isMultiWeek": True})
estimators.append({"modelClass": "Lasso","alpha": 10,"isMultiWeek": False})
estimators.append({
                      "modelClass": "Keras",
                      "isMultiWeek": True,
                      "hiddenLayers": [10,10],
                      "numberOfhiddenLayers": 2,
                      "isTwoWay": True,
                      "l1reg": 0.001,
                      "alpha": 0.01,
                      "dropoutValue": 0.3,
                      "learningRate": 0.0001
                  }
                )
estimators.append({
                      "modelClass": "Keras",
                      "isMultiWeek": False,
                      "hiddenLayers": [10],
                      "numberOfhiddenLayers": 2,
                      "isTwoWay": True,
                      "l1reg": 0.001,
                      "alpha": 0.01,
                      "dropoutValue": 0.3,
                      "learningRate": 0.0001
                  }
                )
estimators.append(
                    {
                        "modelClass": "SGD",
                        "alpha": 10,
                        "learning_rate": 'invscaling',
                        "penalty": 'l2',
                        "isMultiWeek": False    
                    }
                )
estimators.append(
            {
              "modelClass": "MultiTaskLasso",
              "alpha": 10,
              "isMultiWeek": True
            })
estimators.append(
                    {
                        "modelClass": "SVR",
                        "C": 10,
                        "isMultiWeek": False
                    }
                )

estimators.append(
                    {
                        "modelClass": "RandomForrest",
                        "n_estimators": 10,
                        "max_features": None,
                        "min_samples_split": 2,
                        "min_samples_leaf": 1,
                        "isMultiWeek": True
                    }
                 )

estimators.append(
                    {
                        "modelClass": "RandomForrest",
                        "n_estimators": 10,
                        "max_features": 100,
                        "min_samples_split": 2,
                        "min_samples_leaf": 1,
                        "isMultiWeek": False
                    }
                 )

estimators.append(
                    {
                        "modelClass": "XGB",
                        "n_estimators": 10,
                        "alpha": 0.001,
                        "lambda": 0.01,
                        "isMultiWeek": False
                    }
                 )
estimators.append(
                    {
                        "modelClass": "LGBM",
                        "n_estimators": 10,
                        "alpha": 0.001,
                        "lambda": 0.01,
                        "isMultiWeek": False
                    }
                )

estimators.append(
            {
              "modelClass": "KernelRidge",
              "alpha": 10,
              "kernel": "linear",
              "isMultiWeek": True
            })

estimators.append(
            {
              "modelClass": "KernelRidge",
              "alpha": 10,
              "kernel": "linear",
              "isMultiWeek": False
            })




estimatorId = 597
'''    

# constructs and saves the results of a fitted estimator    
def constructResults(estimator, task, weekNumber, numberOfRanEpochs):
    if weekNumber == -1: # we make predictions for all weeks
        # predictions for all weeks
        predictions1 = pd.DataFrame(estimator.predict(X_valid1), index=validation1_labels.index, columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]) 
        predictions2 = pd.DataFrame(estimator.predict(X_valid2), index=validation2_labels.index, columns=["pred_week_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)])
        
        # validation for all weeks
        y_valid1 = validation1_labels[["output_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]]
        y_valid2 = validation2_labels[["output_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]]
        
        resultsDf = pd.DataFrame()
        # compute and safe results for every week
        for outputWeekNumber in range(0,numberOfOutputWeeks):
            # compute root mean squared error for validation sets
            rmse1 = np.sqrt(mean_squared_error(predictions1["pred_week_"+task+"_"+str(outputWeekNumber)], y_valid1["output_"+task+"_"+str(outputWeekNumber)]))
            rmse2 = np.sqrt(mean_squared_error(predictions2["pred_week_"+task+"_"+str(outputWeekNumber)], y_valid2["output_"+task+"_"+str(outputWeekNumber)]))
            # safe the results and all model parameters
            res = {}
            if estimators[estimatorId]["modelClass"] == "Keras":
                res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                     'task':[task],
                     'week':[outputWeekNumber], 
                     'model rmse 1':[rmse1], 
                     'model rmse 2':[rmse2],
                     'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                     "hiddenLayers":[estimators[estimatorId]["hiddenLayers"]],
                     "numberOfhiddenLayers":[len(estimators[estimatorId]["hiddenLayers"])],
                     "isTwoWay": [estimators[estimatorId]["isTwoWay"]],
                     "l1reg": [estimators[estimatorId]["l1reg"]],
                     "alpha": [estimators[estimatorId]["alpha"]],
                     "dropoutValue":[estimators[estimatorId]["dropoutValue"]],
                     "learningRate":[estimators[estimatorId]["learningRate"]],
                     "numberOfRanEpochs": [numberOfRanEpochs]
                    }
                
            elif estimators[estimatorId]["modelClass"] == "RandomForrest":
                res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                     'task':[task],
                     'week':[outputWeekNumber], 
                     'model rmse 1':[rmse1], 
                     'model rmse 2':[rmse2],
                     'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                     "n_estimators": [estimators[estimatorId]["n_estimators"]],
                     "max_features": [estimators[estimatorId]["max_features"]],
                     "min_samples_split": [estimators[estimatorId]["min_samples_split"]],
                     "min_samples_leaf": [estimators[estimatorId]["min_samples_leaf"]]
                    }               
            elif estimators[estimatorId]["modelClass"] == "KernelRidge":
                res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                     'task':[task],
                     'week':[outputWeekNumber], 
                     'model rmse 1':[rmse1], 
                     'model rmse 2':[rmse2],
                     'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                     "alpha": [estimators[estimatorId]["alpha"]],
                     "kernel": [estimators[estimatorId]["kernel"]],
                    }
            elif estimators[estimatorId]["modelClass"] == "Ridge":
                res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                     'task':[task],
                     'week':[outputWeekNumber], 
                     'model rmse 1':[rmse1], 
                     'model rmse 2':[rmse2],
                     'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                     "alpha": [estimators[estimatorId]["alpha"]]
                    }
            elif estimators[estimatorId]["modelClass"] == "Lasso":
                res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                     'task':[task],
                     'week':[outputWeekNumber], 
                     'model rmse 1':[rmse1], 
                     'model rmse 2':[rmse2],
                     'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                     "alpha": [estimators[estimatorId]["alpha"]]
                    }
            elif estimators[estimatorId]["modelClass"] == "MultiTaskLasso":
                res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                     'task':[task],
                     'week':[outputWeekNumber], 
                     'model rmse 1':[rmse1], 
                     'model rmse 2':[rmse2],
                     'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                     "alpha": [estimators[estimatorId]["alpha"]]
                    }
            else:
                raise ValueError('Tried to save results for an unsupported estimator')
            resultsDf = resultsDf.append(pd.DataFrame(data=res), ignore_index = True)
        return resultsDf
    else: # we make predictions only for one week
        # predictions for one week
        predictions1 = pd.DataFrame(estimator.predict(X_valid1), index=validation1_labels.index, columns=["pred_week_"+task+"_"+str(weekNumber)]) 
        predictions2 = pd.DataFrame(estimator.predict(X_valid2), index=validation2_labels.index, columns=["pred_week_"+task+"_"+str(weekNumber)])
        
        # validation for one week
        y_valid1 = validation1_labels[["output_"+task+"_"+str(weekNumber)]]
        y_valid2 = validation2_labels[["output_"+task+"_"+str(weekNumber)]]
        
        rmse1 = np.sqrt(mean_squared_error(predictions1["pred_week_"+task+"_"+str(weekNumber)], y_valid1["output_"+task+"_"+str(weekNumber)]))
        rmse2 = np.sqrt(mean_squared_error(predictions2["pred_week_"+task+"_"+str(weekNumber)], y_valid2["output_"+task+"_"+str(weekNumber)]))
        # safe the results and all model parameters
        res = {}
        if estimators[estimatorId]["modelClass"] == "Keras":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "hiddenLayers":[estimators[estimatorId]["hiddenLayers"]],
                 "numberOfhiddenLayers":[len(estimators[estimatorId]["hiddenLayers"])],
                 "isTwoWay": [estimators[estimatorId]["isTwoWay"]],
                 "l1reg": [estimators[estimatorId]["l1reg"]],
                 "alpha": [estimators[estimatorId]["alpha"]],
                 "dropoutValue":[estimators[estimatorId]["dropoutValue"]],
                 "learningRate":[estimators[estimatorId]["learningRate"]],
                 "numberOfRanEpochs": [numberOfRanEpochs]
                }

        elif estimators[estimatorId]["modelClass"] == "RandomForrest":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "n_estimators": [estimators[estimatorId]["n_estimators"]],
                 "max_features": [estimators[estimatorId]["max_features"]],
                 "min_samples_split": [estimators[estimatorId]["min_samples_split"]],
                 "min_samples_leaf": [estimators[estimatorId]["min_samples_leaf"]]
                }
        elif estimators[estimatorId]["modelClass"] == "XGB":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "n_estimators": [estimators[estimatorId]["n_estimators"]],
                 "alpha": [estimators[estimatorId]["alpha"]],
                 "lambda": [estimators[estimatorId]["lambda"]]
                } 
        elif estimators[estimatorId]["modelClass"] == "LGBM":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "n_estimators": [estimators[estimatorId]["n_estimators"]],
                 "alpha": [estimators[estimatorId]["alpha"]],
                 "lambda": [estimators[estimatorId]["lambda"]]
                } 
        elif estimators[estimatorId]["modelClass"] == "KernelRidge":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "alpha": [estimators[estimatorId]["alpha"]],
                 "kernel": [estimators[estimatorId]["kernel"]],
                }
        elif estimators[estimatorId]["modelClass"] == "Ridge":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "alpha": [estimators[estimatorId]["alpha"]]
                }
        elif estimators[estimatorId]["modelClass"] == "Lasso":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "alpha": [estimators[estimatorId]["alpha"]]
                }
        elif estimators[estimatorId]["modelClass"] == "SGD":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "alpha": [estimators[estimatorId]["alpha"]],
                 "learning_rate": [estimators[estimatorId]["learning_rate"]],
                 "penalty": [estimators[estimatorId]["penalty"]]
                }
        elif estimators[estimatorId]["modelClass"] == "SVR":
            res = {'modelClass':[estimators[estimatorId]["modelClass"]],
                 'task':[task],
                 'week':[weekNumber], 
                 'model rmse 1':[rmse1], 
                 'model rmse 2':[rmse2],
                 'isMultiWeek':[estimators[estimatorId]["isMultiWeek"]],
                 "C": [estimators[estimatorId]["C"]]
                }
        else:
            raise ValueError('Tried to save results for an unsupported estimator')
        return pd.DataFrame(data=res)

        
    
    

# training

if estimators[estimatorId]["isMultiWeek"]:
    # we just train one model per task
    for task in outputCategories:
        # get train labels for all weeks
        y_train = train_labels[["output_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]].values
        if estimators[estimatorId]["modelClass"] == "Keras":
            # get validation labels for all weeks (used for early stopping)
            y_valid1And2 = validation1And2_labels[["output_"+task+"_"+str(outputWeekNumber) for outputWeekNumber in range(0,numberOfOutputWeeks)]].values 
            # construct Keras model
            estimator = genericKerasModel(
                estimators[estimatorId]["learningRate"],
                estimators[estimatorId]["hiddenLayers"],
                estimators[estimatorId]["dropoutValue"],
                estimators[estimatorId]["l1reg"],
                estimators[estimatorId]["alpha"],
                estimators[estimatorId]["isMultiWeek"]
            )
            # fit Keras model
            history = estimator.fit(X_train, 
                                  y_train, 
                                  batch_size=32, 
                                  epochs=1000, 
                                  verbose=0, 
                                  validation_data=(X_valid1And2,y_valid1And2), 
                                  callbacks=[keras.callbacks.EarlyStopping(patience=10)])
            if not os.path.exists('models/'):
                os.makedirs('models')
            estimator.save("models/"+str(estimatorId)+"_"+task)
            numberOfRanEpochs = len(history.history['loss'])
            results = results.append(constructResults(estimator, task, -1, numberOfRanEpochs), ignore_index = True)
            
        elif estimators[estimatorId]["modelClass"] == "RandomForrest":
            estimator = RandomForestRegressor(n_estimators=estimators[estimatorId]["n_estimators"], 
                                              max_features=estimators[estimatorId]["max_features"],
                                              min_samples_split=estimators[estimatorId]["min_samples_split"],
                                              min_samples_leaf=estimators[estimatorId]["min_samples_leaf"],
                                              n_jobs=-1
                                             )
            estimator.fit(X_train,y_train)
            results = results.append(constructResults(estimator, task, -1, -1), ignore_index = True)
        elif estimators[estimatorId]["modelClass"] == "KernelRidge":
            estimator = KernelRidge(alpha=estimators[estimatorId]["alpha"], 
                                    kernel=estimators[estimatorId]["kernel"],
                                    )
            estimator.fit(X_train,y_train)
            results = results.append(constructResults(estimator, task, -1, -1), ignore_index = True)
        elif estimators[estimatorId]["modelClass"] == "Ridge":
            estimator = linear_model.Ridge(alpha=estimators[estimatorId]["alpha"])
            estimator.fit(X_train,y_train)
            results = results.append(constructResults(estimator, task, -1, -1), ignore_index = True)
        elif estimators[estimatorId]["modelClass"] == "Lasso":
            estimator = linear_model.Lasso(alpha=estimators[estimatorId]["alpha"])
            estimator.fit(X_train,y_train)
            results = results.append(constructResults(estimator, task, -1, -1), ignore_index = True)
        elif estimators[estimatorId]["modelClass"] == "SGD":
            estimator = linear_model.SGDRegressor(learning_rate = estimators[estimatorId]["learning_rate"],
                                                  penalty=estimators[estimatorId]["penalty"],
                                                  alpha=estimators[estimatorId]["alpha"],
                                                  max_iter=10000, 
                                                  tol=0.00001, 
                                                  n_iter_no_change=10)
            estimator.fit(X_train,y_train)
            results = results.append(constructResults(estimator, task, -1, -1), ignore_index = True)
        elif estimators[estimatorId]["modelClass"] == "MultiTaskLasso":
            estimator = linear_model.MultiTaskLasso(alpha = estimators[estimatorId]["alpha"])
            estimator.fit(X_train,y_train)
            results = results.append(constructResults(estimator, task, -1, -1), ignore_index = True)
        else:
            raise ValueError('Tried to fit an unsupported estimator')            
else:
    # we have to train one model per output week and per task
    for outputWeekNumber in range(0,numberOfOutputWeeks):
        for task in ['transit_stations_percent_change_from_baseline_7dayAverage',
                  'workplaces_percent_change_from_baseline_7dayAverage']:
            # get train labels for one week
            y_train = train_labels[["output_"+task+"_"+str(outputWeekNumber)]].values
            if estimators[estimatorId]["modelClass"] == "Keras":
                # get validation labels for one week (used for early stopping)
                y_valid = validation1And2_labels[["output_"+task+"_"+str(outputWeekNumber)]].values 
                # construct Keras model
                estimator = genericKerasModel(
                    estimators[estimatorId]["learningRate"],
                    estimators[estimatorId]["hiddenLayers"],
                    estimators[estimatorId]["dropoutValue"],
                    estimators[estimatorId]["l1reg"],
                    estimators[estimatorId]["alpha"],
                    estimators[estimatorId]["isMultiWeek"]
                )
                # fit Keras model
                history = estimator.fit(X_train, 
                  y_train, 
                  batch_size=32, 
                  epochs=1000, 
                  verbose=0, 
                  validation_data=(X_valid1And2,y_valid), 
                  callbacks=[keras.callbacks.EarlyStopping(patience=10)])
                numberOfRanEpochs = len(history.history['loss'])
                results = results.append(constructResults(estimator, task, outputWeekNumber, numberOfRanEpochs), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "RandomForrest":
                estimator = RandomForestRegressor(n_estimators=estimators[estimatorId]["n_estimators"], 
                                              max_features=estimators[estimatorId]["max_features"],
                                              min_samples_split=estimators[estimatorId]["min_samples_split"],
                                              min_samples_leaf=estimators[estimatorId]["min_samples_leaf"],
                                              n_jobs=-1
                                             )
                estimator.fit(X_train,y_train.ravel())
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "KernelRidge":
                estimator = KernelRidge(alpha=estimators[estimatorId]["alpha"], 
                                    kernel=estimators[estimatorId]["kernel"],
                                    )
                estimator.fit(X_train,y_train)
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "Ridge":
                estimator = linear_model.Ridge(alpha=estimators[estimatorId]["alpha"])
                estimator.fit(X_train,y_train)
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "Lasso":
                estimator = linear_model.Lasso(alpha=estimators[estimatorId]["alpha"])
                estimator.fit(X_train,y_train)
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "LGBM":
                estimator = lightgbm.LGBMRegressor(n_estimators=estimators[estimatorId]["n_estimators"], 
                                                   reg_alpha=estimators[estimatorId]["alpha"], 
                                                   reg_lambda=estimators[estimatorId]["lambda"], 
                                                   n_jobs=-1)
                estimator.fit(X_train,y_train.ravel())
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "XGB":
                estimator = xgb.XGBRegressor(n_estimators=estimators[estimatorId]["n_estimators"], 
                                             reg_alpha=estimators[estimatorId]["alpha"], 
                                             reg_lambda=estimators[estimatorId]["lambda"], 
                                             n_jobs=-1)
                estimator.fit(X_train,y_train.ravel())
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "SGD":
                estimator = linear_model.SGDRegressor(learning_rate = estimators[estimatorId]["learning_rate"],
                                                  penalty=estimators[estimatorId]["penalty"],
                                                  alpha=estimators[estimatorId]["alpha"],
                                                  max_iter=10000, 
                                                  tol=0.00001, 
                                                  n_iter_no_change=10)
                estimator.fit(X_train,y_train.ravel())
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            elif estimators[estimatorId]["modelClass"] == "SVR":
                estimator = svm.SVR(C=estimators[estimatorId]["C"])
                estimator.fit(X_train,y_train.ravel())
                results = results.append(constructResults(estimator, task, outputWeekNumber, -1), ignore_index = True)
            else:
                raise ValueError('Tried to fit an unsupported estimator')

def generateModelId(dictionary):
    modelId = ""
    for key in dictionary.keys():
        modelId = modelId + key +"="+ str(dictionary[key]) +"/"
    modelId = modelId[0:-1]
    return modelId

# add a modelId
results["modelId"] = generateModelId(estimators[estimatorId])
results["modelIdNumber"] = estimatorId
                
if not os.path.exists('results/'):
    os.makedirs('results')
results.to_csv("results/"+str(estimatorId)+".csv", header=True, index=False)

Start Keras: 597
Keras: 19584
Total: 20181
INFO:tensorflow:Assets written to: models/597_case_inz_entries_7dayAverage/assets
INFO:tensorflow:Assets written to: models/597_hosp_inz_entries_7dayAverage/assets
INFO:tensorflow:Assets written to: models/597_death_inz_entries_7dayAverage/assets
INFO:tensorflow:Assets written to: models/597_testPositvity_7dayAverage/assets
INFO:tensorflow:Assets written to: models/597_transit_stations_percent_change_from_baseline_7dayAverage/assets
INFO:tensorflow:Assets written to: models/597_workplaces_percent_change_from_baseline_7dayAverage/assets


In [None]:
'''
file1 = open('output.log', 'r')
Lines = file1.readlines()
 


estimatorErrors = []

for line in Lines:
    estimatorErrors.append(int(line[32:-11]))

   

with open('erstimatorErrors.txt', 'w') as f:
    for e in estimatorErrors:
        f.write(str(e)+"\n")
display(len(estimatorErrors))


for e in estimatorErrors[0:125]:
    display(str(e)+": "+str(estimators[e]))
  
'''
