In [34]:
import numpy as np  # 1.26.4
import pandas as pd  # 2.2.0
from sklearn.ensemble import RandomForestRegressor  # 1.4.1
from sklearn.metrics import mean_absolute_error

In [42]:
df = pd.read_csv("./output/df_melt_total.csv", index_col=0)
df.index = pd.to_datetime(df.index, format="%Y-%m-%d")
df = df.drop(["Weekday_label", "Lockdown", "Health_pass", "Vaccine_pass"], axis=1)
df.to_csv("visits.csv")

In [36]:
df_start = "2022-01-13"
df_split = "2023-11-19"
df_stop = "2023-12-03"
target = "Visits"

# Train and test datasets creation
X = df.iloc[:, :-1]
y = df[[target]]

X_train = X.loc[(X.index >= df_start) & (X.index < df_split), :]
y_train = y.loc[(X.index >= df_start) & (X.index < df_split), :]

X_test = X.loc[(X.index >= df_split) & (X.index < df_stop), :]
y_test = y.loc[(X.index >= df_split) & (X.index < df_stop), :]

# Model hyper-parameters
n_estimators = 100
max_depth = 10

# Model creation and training
rand_for = RandomForestRegressor(n_estimators=n_estimators,
                                 max_depth=max_depth)

rand_for.fit(X_train, y_train.to_numpy().ravel())

# Features' importance extraction
feat_imp = pd.DataFrame(data=rand_for.feature_importances_,
                        index=X.columns,
                        columns=["Percentage"]).sort_values(by="Percentage", ascending=False)

# Predictions on test dataset
y_pred = pd.DataFrame(data=np.round(rand_for.predict(X_test)),
                      index=X_test.index,
                      columns=[target])

y_pred.to_csv("y_pred.csv")

# MAE calculation
mae = mean_absolute_error(y_test, y_pred)


In [39]:
mae

11.285714285714286

In [40]:
feat_imp

Unnamed: 0,Percentage
Temp_moy,0.300289
Opening_Time,0.21906
Weekday,0.206404
Month,0.095841
National_holidays,0.054863
Vent_moy,0.043445
DayofMonth,0.035225
Pluie_moy,0.021005
School_Holidays,0.018523
Year,0.005346


In [41]:
y_pred

Unnamed: 0_level_0,Visits
Date,Unnamed: 1_level_1
2023-11-19,125.0
2023-11-20,29.0
2023-11-21,62.0
2023-11-22,93.0
2023-11-23,68.0
2023-11-24,72.0
2023-11-25,125.0
2023-11-26,120.0
2023-11-27,29.0
2023-11-28,69.0
