In [1]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/m5-forecasting-uncertainty/calendar.csv
/kaggle/input/m5-forecasting-uncertainty/sample_submission.csv
/kaggle/input/m5-forecasting-uncertainty/sell_prices.csv
/kaggle/input/m5-forecasting-uncertainty/sales_train_validation.csv
/kaggle/input/m5-forecasting-uncertainty/sales_train_evaluation.csv
/kaggle/input/m5-forecasting-accuracy/calendar.csv
/kaggle/input/m5-forecasting-accuracy/sample_submission.csv
/kaggle/input/m5-forecasting-accuracy/sell_prices.csv
/kaggle/input/m5-forecasting-accuracy/sales_train_validation.csv
/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv


In [2]:
df_calendar = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/calendar.csv')
df_sell_prices = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/calendar.csv')
df_train = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sales_train_validation.csv')
df_test = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv')

In [4]:
from abc import ABC, abstractmethod

class Forecaster(ABC):
    """
    Base predictor class. Must have a model object, a fit function and a predict function
    """
    def __init__(self, model, **kwargs):
        self.model = model
    @abstractmethod
    def fit(self):
        pass
    @abstractmethod
    def predict(self):
        pass

In [5]:
df_train.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4


In [6]:
df_calendar.head()

Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
0,2011-01-29,11101,Saturday,1,1,2011,d_1,,,,,0,0,0
1,2011-01-30,11101,Sunday,2,1,2011,d_2,,,,,0,0,0
2,2011-01-31,11101,Monday,3,1,2011,d_3,,,,,0,0,0
3,2011-02-01,11101,Tuesday,4,2,2011,d_4,,,,,1,1,0
4,2011-02-02,11101,Wednesday,5,2,2011,d_5,,,,,1,0,1


In [7]:
testx = [1,2,3,4,5]

# Naive

$\hat{Y}_{n+i} = Y_{n}, i = 1,2,...,h$

In [8]:
def naive_model_fn(inputs: list, horizon:int=28, forecast_type:str=None):
    value = inputs[-7:] if forecast_type == "seasonal" else inputs[-1:]
    forecast = value * horizon
    forecast = forecast[-horizon:]
    assert len(forecast) == horizon
    return forecast

class NaiveForecaster(Forecaster):
    def __init__(self):
        super().__init__(naive_model_fn)
    def fit(self, inputs, horizon, forecast_type):
        return self.model
    def predict(self, inputs, horizon, forecast_type):
        return self.model(inputs, horizon, forecast_type)

In [9]:
naive_forecaster = NaiveForecaster()
pred_naive = naive_forecaster.predict(testx, 28, None)
pred_s_naive = naive_forecaster.predict(testx, 28, "seasonal")

print(pred_naive)
print(pred_s_naive)

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
[3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5]


# Simple Exponential Smoothing

$\hat{Y}_{t+1} = \alpha Y_{t} + (1-\alpha)\hat{Y}_{t}$ where alpha is in the range [0.1,0.3]

In [73]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

def ses_fn(x: list, alpha:float, horizon=int):
    def _update_forecast(prev_actual, prev_forecast, alpha):
        return alpha * prev_actual + (1-alpha) * prev_forecast
    for idx, val in enumerate(x):
        if idx == 0:
            forecast = val
            prev_actual = val
        else:
            forecast = _update_forecast(prev_actual, forecast, alpha)
            prev_actual = val
    forecast = _update_forecast(prev_actual, forecast, alpha)
    return [forecast] * horizon

class SimpleExponentialForecaster(Forecaster):
    def __init__(self):
        super().__init__(ses_fn)
    def fit(self, alpha, optimized):
        return self.model
    def predict(self, x, alpha, horizon):
        return self.model(x, alpha, horizon)

In [74]:
print(testx)
ses_forecaster = SimpleExponentialForecaster()
pred_ses = ses_forecaster.predict(testx, alpha=0.1, horizon=28)
print(pred_ses)

[1, 2, 3, 4, 5]
[1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049, 1.9049]


# Moving Averages

$\hat{Y}_t = \frac{\sum^{k}_{i=1} Y_{t-i}}{k}$ where K is selected from the range [2,5]

In [83]:
def moving_averages_fn(x: list, k:int, horizon:int=28):
    forecast = np.mean(x[-k:])
    return [forecast] * horizon

class MovingAverageForecaster(Forecaster):
    def __init__(self):
        super().__init__(moving_averages_fn)
    def fit(self):
        return self.model
    def predict(self, inputs, k):
        return self.model(inputs, k)

In [84]:
print(testx)
movavg_forecaster = MovingAverageForecaster()
pred_movavg = movavg_forecaster.predict(testx, k=2)
print(pred_movavg)

[1, 2, 3, 4, 5]
[4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5]


# Croston's Method (CRO)
$\hat{Y}_t = \frac{\hat{z}_t}{\hat{p}_t}$

Where:
- $\hat{z}_t$ represents the non-zero demand size
- $\hat{p}_t$ represents the inter-demand intervals
- $\hat{z}_t$ and $\hat{p}_t$ are predicted using simple exponential smoothing and smoothing parameters of both components are set to 0.1 and the first observation of the components are used for initialisations.

In [None]:
croston_forecaster = CrostonForecaster()
pred_croston = croston_forecaster.predict(testx, threshold=3, horizon=28)
print(pred_croston)

In [None]:

y = np.array([0,0,2,7,0,0,0,-5])
n_timepoints = len(y)
smoothing = 0.1
first_occurrence = np.argmax(y[:n_timepoints] > 0)

q, a, f = np.full((3, n_timepoints + 1), np.nan)
p = 1  # periods since last demand observation

q[0] = y[first_occurrence]
a[0] = 1 + first_occurrence
f[0] = q[0] / a[0]

for t in range(0, n_timepoints):
    if y[t] > 0:
        q[t + 1] = smoothing * y[t] + (1 - smoothing) * q[t]
        a[t + 1] = smoothing * p + (1 - smoothing) * a[t]
        f[t + 1] = q[t + 1] / a[t + 1]
        p = 1
    else:
        q[t + 1] = q[t]
        a[t + 1] = a[t]
        f[t + 1] = f[t]
        p += 1

print(first_occurrence)
print(y)
print(q)
print(a)
print(f)

# Aggregate-Disaggregate Intermittend Demand Approach (ADIDA)
