# ELP EU Orders

#### Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

from nv_forecasting.configs import DEFAULT_CONFIG as config
from nv_forecasting.load_data.elp_eu_orders_daily import ELPEUOrdersDaily
from nv_forecasting.style import set_style
from nv_forecasting.feature_engineering.add_time_features import add_time_features
from nv_forecasting.feature_engineering.add_lags import add_lags
from nv_forecasting.plots import plot_cross_validation_results
from nv_forecasting.metrics import get_scores, add_scores_to_dict
from nv_forecasting.model_wrappers.mean_forecaster import MeanForecaster
from nv_forecasting.model_wrappers.random_forest_gs import RandomForestGS
from nv_forecasting.cross_validation.cross_validate import cross_validate

#### Settings

In [None]:
TARGET = config.get('target') # Target column name
DATETIME_COLUMN_NAME = config.get('datetime_column_name')
AGGREGATION = config.get('aggregation')
COLUMNS_TO_LAG = config.get('columns_to_lag')
COLUMNS_TO_DROP = config.get('columns_to_drop')

# Feature engineering
LAGS = config.get('lags')

# Model training
N_OUTER_SPLITS = config.get('n_outer_splits')
N_INNER_SPLITS = config.get('n_inner_splits')
N_FINAL_SPLITS = config.get('n_final_splits')
TEST_SIZE = config.get('test_size')
SCORING = config.get('scoring')

# Data splits
outer_cv = config.get('outer_cv')
inner_cv = config.get('inner_cv')
final_cv = config.get('final_cv')

# Style
set_style()

#### Loading the data

In [None]:
data_handler = ELPEUOrdersDaily('data\\elp_eu_orders_daily.csv')
df = data_handler.get_dataframe(agg=AGGREGATION)

#### Feature engineering

In [None]:
add_time_features(df)
add_lags(df, lags=LAGS, columns=COLUMNS_TO_LAG)
df.dropna(inplace=True)

X = df.drop(columns=COLUMNS_TO_DROP)
y = df[[TARGET]]

#### Choice of models

In [None]:
rf = RandomForestGS(param_grid={
    'n_estimators': [100, 200, 500, 1000, 2000],
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 3, 4, 5],
})

mean_forecaster = MeanForecaster(last_days=TEST_SIZE)

In [None]:
models_list = [rf, mean_forecaster]

#### Hyperparameter tuning (time series nested cross-validation)

In [None]:
cv_summary = cross_validate(models_list, outer_cv, inner_cv, X, y, TARGET, SCORING, additional_aggregations=['ME'])

#### Plotting cross-validation results

In [None]:
plot_cross_validation_results(y, TARGET, outer_cv, cv_summary, models_names_list=['random_forest_gs', 'mean_forecaster'], linewidth=0.5)