In [230]:
# imports
import pandas as pd
import numpy as np
pd.options.plotting.backend = 'plotly'

from sktime.forecasting.naive import NaiveForecaster
from sktime.performance_metrics.forecasting import mean_absolute_scaled_error, mean_absolute_error, mean_absolute_percentage_error, mean_squared_error


## read data

In [231]:
df_store = pd.read_pickle('data/df_daily.pkl')


## define functions

In [232]:
def preprocessing(ts):
    '''
    '''
    ts = ts['sales']/1e6
    ts.index.freq = 'D'
    return {'y':ts}

def cross_validation_result(data, model, model_name, rolls=4, horizon=30):
    '''
    '''
    mae_CVs = []
    rmse_CVs = []
    mape_CVs = []
    mase_CVs = []
    for i in range(rolls):
        y_train = data['y'].iloc[:-(rolls-i)*horizon]
        y_test = data['y'].iloc[np.r_[-(rolls-i)*horizon:-(rolls-i-1)*horizon]]
        
        model.fit(y=y_train)
        y_hat = model.predict(fh=list(range(1,horizon+1)))
        
        mae_CVs.append(round(mean_absolute_error(y_test, y_hat), 3))
        rmse_CVs.append(round(mean_squared_error(y_test, y_hat, square_root=True), 3))
        mape_CVs.append(round(mean_absolute_percentage_error(y_test, y_hat), 3))
        mase_CVs.append(round(mean_absolute_scaled_error(y_test, y_hat, y_train=y_train, sp=7), 3))
    
    return {'store':model_name,
           'sales': y_test,
           'mae_SNAIVE':np.mean(mae_CVs),
           'rmse_SNAIVE':np.mean(rmse_CVs),
           'mape_SNAIVE':np.mean(mape_CVs),
           'mase_SNAIVE':np.mean(mase_CVs),
           'fc_SNAIVE':y_hat,
           }


In [233]:
# mase example 1
y_train = np.random.randint(10, size=10)
y_true = np.random.randint(10, size=3)
y_pred = np.random.randint(10, size=3)
print(y_train,y_true,y_pred,sep="\n")

mase_sp = 2

# seasonal naive 
y_true_naive = y_train[mase_sp:]
y_pred_naive = y_train[:-mase_sp]
mae_naive = mean_absolute_error(y_true_naive, y_pred_naive)

# model mae
mae_pred = round(mean_absolute_error(y_true, y_pred),3)

# mase
mase = mae_pred / mae_naive

#
print(f'''
      y_true_naive  {y_true_naive}
      y_pred_naive  {y_pred_naive}
      mae_naive     {mae_naive} 
      mae_pred      {mae_pred} 
      mase              {mase}
      ''', sep="\n")


[4 6 9 5 0 4 5 9 6 7]
[7 9 7]
[5 5 2]

      y_true_naive  [9 5 0 4 5 9 6 7]
      y_pred_naive  [4 6 9 5 0 4 5 9]
      mae_naive     3.625 
      mae_pred      3.667 
      mase              1.0115862068965518
      


## fit on store data

In [234]:
all_stores_result_CV = []
for store in df_store["store_id"].unique():
    model_name = "store_" + str(store)

    # data
    df_data = df_store[df_store["store_id"] == store].set_index("date")[["sales"]]
    df_data = preprocessing(df_data)

    # fit model to each store
    model = NaiveForecaster(sp=7, strategy="mean") # best strategy
    cv_score = cross_validation_result(data=df_data, model=model, model_name=model_name, horizon=7)

    # result
    all_stores_result_CV.append(cv_score)
all_stores_result_CV = pd.DataFrame(all_stores_result_CV)


  values = pd.Int64Index(values, dtype=np.int)
  assert isinstance(by, (int, np.integer, pd.Int64Index)), type(by)
  if not hasattr(x, "freq") or x.freq is None:
  by *= x.freq
  if hasattr(x, "freqstr"):
  if x.freqstr is None:
  elif "-" in x.freqstr:
  return x.freqstr
  if hasattr(x, "freqstr"):
  if x.freqstr is None:
  elif "-" in x.freqstr:
  return x.freqstr
  values = pd.Int64Index(values, dtype=np.int)
  assert isinstance(by, (int, np.integer, pd.Int64Index)), type(by)
  if not hasattr(x, "freq") or x.freq is None:
  by *= x.freq
  if hasattr(x, "freqstr"):
  if x.freqstr is None:
  elif "-" in x.freqstr:
  return x.freqstr
  if hasattr(x, "freqstr"):
  if x.freqstr is None:
  elif "-" in x.freqstr:
  return x.freqstr
  values = pd.Int64Index(values, dtype=np.int)
  assert isinstance(by, (int, np.integer, pd.Int64Index)), type(by)
  if not hasattr(x, "freq") or x.freq is None:
  by *= x.freq
  if hasattr(x, "freqstr"):
  if x.freqstr is None:
  elif "-" in x.freqstr:
  return

## result

In [235]:
all_stores_result_CV.to_pickle('results/f8/SNAIVE_7.pkl')
all_stores_result_CV.head()


Unnamed: 0,store,sales,mae_SNAIVE,rmse_SNAIVE,mape_SNAIVE,mase_SNAIVE,fc_SNAIVE
0,store_307222,date 2021-01-25 24.848540 2021-01-26 18....,7.82875,10.076,0.2515,0.509,2021-01-25 17.433143 2021-01-26 19.11446...
1,store_307244,date 2021-01-25 17.889600 2021-01-26 18....,5.05825,6.30275,0.2595,0.39525,2021-01-25 14.368380 2021-01-26 14.75122...
2,store_307248,date 2021-01-25 21.487100 2021-01-26 23....,7.88775,9.269,0.383,0.75725,2021-01-25 12.218554 2021-01-26 12.39495...
3,store_320264,date 2021-01-25 12.109300 2021-01-26 11....,5.3755,7.82425,0.3055,0.539,2021-01-25 10.469960 2021-01-26 10.82101...
4,store_328165,date 2021-01-25 41.708122 2021-01-26 5...,32.49675,45.4095,0.443,1.3275,2021-01-25 28.437616 2021-01-26 28.82474...


In [236]:
all_stores_result_CV.describe()


Unnamed: 0,mae_SNAIVE,rmse_SNAIVE,mape_SNAIVE,mase_SNAIVE
count,38.0,38.0,38.0,38.0
mean,8.907645,11.243112,0.418559,0.898513
std,7.784758,10.157101,0.114154,0.437712
min,2.8145,3.54375,0.22825,0.39525
25%,3.648625,4.848813,0.344812,0.592875
50%,5.500625,6.92125,0.413875,0.811625
75%,11.495188,13.977063,0.479063,1.09225
max,38.767,48.55925,0.78825,2.65825
