# Store Item Demand Forecasting Challenge


In [169]:
import os
import random

import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sktime.split import temporal_train_test_split
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.compose import ForecastingPipeline
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
from sktime.transformations.series.summarize import WindowSummarizer
from sktime.utils.plotting import plot_series

from src.constants import RANDOM_STATE

In [3]:
np.random.seed(RANDOM_STATE)
os.environ["PYTHONHASHSEED"] = str(RANDOM_STATE)
random.seed(RANDOM_STATE)

## Load data

In [116]:
data_raw = pd.read_csv("../data/train.csv")
data = data_raw.copy()

In [117]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 913000 entries, 0 to 912999
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   date    913000 non-null  object
 1   store   913000 non-null  int64 
 2   item    913000 non-null  int64 
 3   sales   913000 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 27.9+ MB


In [118]:
data["date"] = pd.to_datetime(data.date).dt.to_period(freq="D")
data = data.set_index(["store", "item", "date"])

In [119]:
y = data

In [206]:
y.sample(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sales
store,item,date,Unnamed: 3_level_1
9,16,2015-11-28,40
5,10,2013-11-04,43
3,30,2013-09-06,39


In [121]:
y_train, y_test = temporal_train_test_split(y, test_size=0.3)

In [203]:
fh_range = pd.date_range(y_test.index.get_level_values(-1).min().to_timestamp(), y_test.index.get_level_values(-1).max().to_timestamp(), freq="D").to_period()
fh = ForecastingHorizon(fh_range, is_relative=False, freq="D")

## Modeling

In [125]:
kwargs = {
    "lag_feature": {
        "lag": [1],
        "mean": [[1, 3], [3, 6]],
        "std": [[1, 4]],
    }
}

In [135]:
regressor = RandomForestRegressor(n_jobs=-1, random_state=RANDOM_STATE)
forecaster = make_reduction(
    regressor,
    strategy="recursive",
    window_length=12,
)

In [136]:
pipe = ForecastingPipeline(
    steps=[
        ("pre-processor", WindowSummarizer(**kwargs)),
        ("forecaster", forecaster),
    ]
)

In [137]:
forecaster.fit(y_train)

In [204]:
y_pred = forecaster.predict(fh=fh)
smape = MeanAbsolutePercentageError()
smape(y_test, y_pred)  

0.2669737593616675