In [None]:
import pandas as pd

# https://github.com/vcerqueira/blog/
from src.tde import time_delay_embedding

wine = pd.read_csv('data/wine_sales.csv', parse_dates=['date'])

# setting date as index
wine.set_index('date', inplace=True)

# you can simulate some data with the following code
# wine = pd.DataFrame(np.random.random((100, 6)),
#            columns=['Fortified','Drywhite','Sweetwhite',
#                      'Red','Rose','Sparkling'])

# create data set with lagged features using time delay embedding
wine_ds = []
for col in wine:
    col_df = time_delay_embedding(wine[col], n_lags=12, horizon=6)
    wine_ds.append(col_df)

# concatenating all variables
wine_df = pd.concat(wine_ds, axis=1).dropna()

# defining target (Y) and explanatory variables (X)
predictor_variables = wine_df.columns.str.contains('\(t\-')
target_variables = wine_df.columns.str.contains('Sparkling\(t\+')

X = wine_df.iloc[:, predictor_variables]
Y = wine_df.iloc[:, target_variables]

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as mae
from sklearn.ensemble import RandomForestRegressor


# train/test split
X_tr, X_ts, Y_tr, Y_ts = train_test_split(X, Y, test_size=0.3, shuffle=False)

# fitting a RF model
model = RandomForestRegressor()
model.fit(X_tr, Y_tr)

# getting forecasts for the test set
preds = model.predict(X_ts)

# computing MAE error
print(mae(Y_ts, preds))
# 288.13

In [None]:
# getting importance scores from previous model
importance_scores = pd.Series(dict(zip(X_tr.columns, model.feature_importances_)))

# getting top 10 features
top_10_features = importance_scores.sort_values(ascending=False)[:10]
top_10_features_nm = top_10_features.index

X_tr_top = X_tr[top_10_features_nm]
X_ts_top = X_ts[top_10_features_nm]

# re-fitting the model
model_top_features = RandomForestRegressor()
model_top_features.fit(X_tr_top, Y_tr)

# getting forecasts for the test set
preds_topf = model_top_features.predict(X_ts_top)

# computing MAE error
print(mae(Y_ts, preds_topf))
# 274.36