In [52]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_val_score

In [61]:
ModelsDict={}
ModelsDict['linreg'] = LinearRegression()
ModelsDict['RF'] = RandomForestRegressor(n_estimators=150, max_depth=15)
ModelsDict['MLP'] = MLPRegressor(solver = 'sgd', max_iter=1500)

In [68]:
TargetList=['Max_temp', 'Min_temp', 'Mean_temp'] #possible targets of prediction
FeatureList=['allfeatures', 'onlytempfeatures', 'wind_rainfeatures'] #possible features to use for prediction

In [96]:
ModelScores={}
def RunModel(model, daynum, f, temp):
    df = pd.read_csv(f'./preprocessed/{daynum}daysprev_{f}_{temp}.csv')
    todrop=['Unnamed: 0']
    df = df.drop(todrop, axis = 1)
    inputlist = df.columns.tolist()
    target = inputlist[-1]
    del inputlist[-1] # deleting target from input variables
    inputvar = df[inputlist]
    tar = df[target]
    #print(cross_val_score(model, inputvar, tar, scoring='neg_mean_absolute_error', cv=10).mean())
    
    ModelScores[f'Error of {temp} predicted by {model} from {f} using {daynum} days before'] = cross_val_score(model, inputvar, tar, scoring='neg_mean_absolute_error', cv=10).mean()


Sample usage of RunModel: RunModel(model = ML model to run, daynum= nr of days before target to take into account, f = what features to consider (all/onlytemp/wind_rain, see in FeatureList), temp = what temperature we want to predict (min, max or mean, see TargetList for potential values))


output: average of 10-fold cross validation of mean absolute errors stored in a dictionary called ModelScores

In [97]:
for model in ModelsDict.values():
    RunModel(model, 3, 'allfeatures', 'Mean_temp')

In [98]:
ModelScores

{'Error of Mean_temp predicted by LinearRegression() from allfeatures using 3 days before': -0.17003125559235294,
 'Error of Mean_temp predicted by RandomForestRegressor(max_depth=15, n_estimators=150) from allfeatures using 3 days before': -0.1796522382745067,
 "Error of Mean_temp predicted by MLPRegressor(max_iter=1500, solver='sgd') from allfeatures using 3 days before": -0.19473838900401425}