In [3]:
import numpy as np
import pandas as pd

pd.options.mode.chained_assignment = None 
pd.options.display.float_format = '{:,.2f}'.format

path = 'data/'

data = pd.read_pickle(path + "data.pkl")

In [4]:
moisCol = pd.get_dummies(data[['Mois']])
jourCol = pd.get_dummies(data[['Jour']])
jourFerieTypeCol = pd.get_dummies(data[['JourFerieType']])


In [5]:
data_with_dummies = pd.concat([data.drop(columns=['Mois', 'Jour', 'JourFerieType']), moisCol, jourCol, jourFerieTypeCol], axis=1)

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(data_with_dummies.drop(columns=['DateTime', 'Consommation']), data_with_dummies['Consommation'], test_size=0.5, random_state=999)

In [7]:
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import root_mean_squared_error as rmse

In [9]:
# Naive predictor
predict_naive_test = np.repeat(Y_train.values.mean(),len(Y_test), axis = 0)
predict_naive_train = np.repeat(Y_train.values.mean(),len(Y_train), axis = 0)

mape_test = mape(Y_test, predict_naive_test)*100
mape_train = mape(Y_train, predict_naive_train)*100

rmse_test = rmse(Y_test, predict_naive_test)
rmse_train = rmse(Y_train, predict_naive_train)

print('On train set\n')
print(f'The MAPE in naive prediction with mean is {mape_train:.2f}%.')
print(f'The RMSE in naive prediction with mean is {rmse_train:.2f}.')
print('\nOn test set\n')
print(f'The MAPE in naive prediction with mean is {mape_test:.2f}%.')
print(f'The RMSE in naive prediction with mean is {rmse_test:.2f}.')

On train set

The MAPE in naive prediction with mean is 18.80%.
The RMSE in naive prediction with mean is 11760.80.

On test set

The MAPE in naive prediction with mean is 18.88%.
The RMSE in naive prediction with mean is 11807.62.


In [12]:
import time as tm

def fit_and_predict_error(model, x_train, y_train, x_test, y_test):
    start_time = tm.time()
    model.fit(x_train, y_train)
    predict_train = model.predict(x_train)
    predict_test = model.predict(x_test)
    end_time = tm.time()

    mape_test = mape(y_test, predict_test)*100
    rmse_test = rmse(y_test, predict_test)
    
    print('\nOn test set\n')
    print(f'The MAPE is {mape_test:.2f}%.')
    print(f'The RMSE is {rmse_test:.2f}.')
    
    return {'train' : predict_train, 'test' : predict_test, 
            'mape_train' : mape(y_train, predict_train)*100,
            'mape_test' : mape_test,
            'rmse_train' : rmse(y_train, predict_train), 
            'rmse_test' : rmse_test, 
            'time' : end_time-start_time}

In [13]:
def add_error(model_out, model_name, df):
    return df._append({'Model' : model_name, 
                  'MAPE test' : model_out['mape_test'], 
                  'RMSE test' : model_out['rmse_test'], 
                  'MAPE train' : model_out['mape_train'], 
                  'RMSE train' : model_out['rmse_train'], 
                  'CPU time' : model_out['time']}, ignore_index=True)

In [14]:
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
features = ['Temperature', 'Nebulosity', 'Humidity', 'WindSpeed', 'Precipitation', 'PositionDansAnnee', 'DemiHeure', 'JourFerie',  'Vacances', 'MJour', 'Annee', 'is.2020']

X_train_S = X_train[features]
X_train_L = X_train

X_test_S = X_test[features]
X_test_L = X_test