In [7]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

.resample() можно использовать только с индексом типа DatetimeIndex, поэтому индексируем датафрейм по Date

In [8]:
df = pd.read_csv('train.csv', index_col='Date', parse_dates=['Date'])
df.drop(['store', 'product'], axis = 1, inplace = True)
df = df.resample(rule='D').sum()

df.shape

(3287, 1)

Для дальнейшей работы с датасетом преобразуем столбец с датами обратно

In [9]:
df = df.reset_index()

приведение к типу, подходящему для statsforecast:
 

In [10]:
df.rename(columns={'Date': 'ds', 'number_sold': 'y'}, inplace=True)
df['unique_id'] = 'series_1'
df.head()

Unnamed: 0,ds,y,unique_id
0,2010-01-01,54870,series_1
1,2010-01-02,54960,series_1
2,2010-01-03,54863,series_1
3,2010-01-04,54926,series_1
4,2010-01-05,54634,series_1


In [11]:
train = df.loc[df['ds'] < '2017-12-31']
test = df.loc[(df['ds'] >= '2017-12-31') & (df['ds'] < '2018-12-31')]

horizont = test['ds'].unique()

print(f'Train data shape: {train.shape}')
print(f'Test data shape: {test.shape}')
print(f'Horizont shape: {horizont.shape}')

Train data shape: (2921, 3)
Test data shape: (365, 3)
Horizont shape: (365,)


In [13]:
from statsforecast import StatsForecast
from statsforecast.models import Naive, SeasonalNaive, WindowAverage, SeasonalWindowAverage

model = StatsForecast(models = [Naive()], 
                                # SeasonalNaive(season_length=7), 
                                # WindowAverage(window_size=7),  
                                # SeasonalWindowAverage(window_size=4, season_length=7)], 
                    freq='D', n_jobs=-1)

model.fit(train)

StatsForecast(models=[Naive])

In [14]:
forecasts_df = model.forecast(df=test, h=365)
forecasts_df['original_y'] = test['y'].values
forecasts_df.head()



Unnamed: 0_level_0,ds,Naive,original_y
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
series_1,2018-12-31,55355.0,55255
series_1,2019-01-01,55355.0,55103
series_1,2019-01-02,55355.0,54995
series_1,2019-01-03,55355.0,54837
series_1,2019-01-04,55355.0,54975


# MAPE

In [15]:
def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [17]:
methods = {
    'Naive': forecasts_df['Naive']
    # 'SeasonalNaive': forecasts_df['SeasonalNaive'],
    # 'WindowAverage': forecasts_df['WindowAverage'],
    # 'SeasonalWindowAverage': forecasts_df['SeasWA']
}

for method_name, predictions in methods.items():
    error = mape(forecasts_df['original_y'], predictions)
    print(f"MAPE {method_name}: {error:.2f}%")


MAPE Naive: 0.65%


In [18]:
import joblib

joblib.dump(model, 'naive_model.pkl')

['model.pkl']