In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_val_score

In [2]:
ModelsDict={}
ModelsDict['linreg'] = LinearRegression()
ModelsDict['RF'] = RandomForestRegressor(n_estimators=150, max_depth=15)
ModelsDict['MLP'] = MLPRegressor(solver = 'sgd', max_iter=1500)

In [3]:
TargetList=['Max_temp', 'Min_temp', 'Mean_temp'] #possible targets of prediction
FeatureList=['allfeatures', 'onlytempfeatures', 'wind_rainfeatures'] #possible features to use for prediction

In [37]:
#ModelScores={}

errdf = pd.DataFrame(columns=['model', 'daynum', 'feature', 'target', 'error'])
#errdf.columns = ['model', 'daynum', 'feature', 'target', 'error']

def RunModel(rownum, model, daynum, f, temp):
    df = pd.read_csv(f'./preprocessed/{daynum}daysprev_{f}_{temp}.csv')
    todrop=['Unnamed: 0']
    df = df.drop(todrop, axis = 1)
    inputlist = df.columns.tolist()
    target = inputlist[-1]
    del inputlist[-1] # deleting target from input variables
    inputvar = df[inputlist]
    tar = df[target]
    #print(cross_val_score(model, inputvar, tar, scoring='neg_mean_absolute_error', cv=10).mean())
    
    errval = cross_val_score(model, inputvar, tar, scoring='neg_mean_absolute_error', cv=10).mean()

    errdf.loc[rownum] = pd.Series({'model':str(model), 'daynum':daynum, 'feature':f, 'target':temp, 'error':errval})
    #ModelScores[f'Error of {temp} predicted by {model} from {f} using {daynum} days before'] = cross_val_score(model, inputvar, tar, scoring='neg_mean_absolute_error', cv=10).mean()


Sample usage of RunModel: RunModel(model = ML model to run, daynum= nr of days before target to take into account, f = what features to consider (all/onlytemp/wind_rain, see in FeatureList), temp = what temperature we want to predict (min, max or mean, see TargetList for potential values))


output: average of 10-fold cross validation of mean absolute errors stored in a dictionary called ModelScores

In [40]:
#ModelScores={}

rowcounter = 0

for model in ModelsDict.values():
    for daysprev in range(8):
        for target in TargetList:
            for feature in FeatureList:
                RunModel(rowcounter, model, daysprev, feature, target)
                rowcounter = rowcounter+1
        print(daysprev)
    print(str(model))

'''
for model in ModelsDict.values():
    for daysprev in range(7):
        #for target in TargetList:
        #for feature in FeatureList.values():
        RunModel(model, daysprev, 'allfeatures', 'Max_temp')
'''

0
1
2
3
4
5
6
7
LinearRegression()
0
1
2
3
4
5
6
7
RandomForestRegressor(max_depth=15, n_estimators=150)
0
1
2
3
4
5




6




7
MLPRegressor(max_iter=1500, solver='sgd')


"\nfor model in ModelsDict.values():\n    for daysprev in range(7):\n        #for target in TargetList:\n        #for feature in FeatureList.values():\n        RunModel(model, daysprev, 'allfeatures', 'Max_temp')\n"

In [42]:
errdf.to_csv(f'output/modelscores.csv')

errdf

#ModelScores

Unnamed: 0,model,daynum,feature,target,error
0,LinearRegression(),0,allfeatures,Max_temp,-0.208605
1,LinearRegression(),0,onlytempfeatures,Max_temp,-0.210488
2,LinearRegression(),0,wind_rainfeatures,Max_temp,-0.877428
3,LinearRegression(),0,allfeatures,Min_temp,-0.179761
4,LinearRegression(),0,onlytempfeatures,Min_temp,-0.187652
...,...,...,...,...,...
211,"MLPRegressor(max_iter=1500, solver='sgd')",7,onlytempfeatures,Min_temp,-0.200629
212,"MLPRegressor(max_iter=1500, solver='sgd')",7,wind_rainfeatures,Min_temp,-0.81521
213,"MLPRegressor(max_iter=1500, solver='sgd')",7,allfeatures,Mean_temp,-0.202918
214,"MLPRegressor(max_iter=1500, solver='sgd')",7,onlytempfeatures,Mean_temp,-0.193558
