In [1]:
import numpy as np
import pandas as pd

# Модель для прогнозирования, например ARIMA
from sktime.forecasting.arima import ARIMA
# Визуализация временных рядов
from sktime.utils.plotting import plot_series
# Модули для кросс-валидации
from sktime.split import temporal_train_test_split, ExpandingWindowSplitter, SlidingWindowSplitter, SingleWindowSplitter
from sktime.forecasting.model_evaluation import evaluate
from sktime.performance_metrics.forecasting import MeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError # Метрики MSE, MAE, MAPE

import pandas_datareader.data as web

# настройки визуализации
import matplotlib.pyplot as plt

# Не показывать Warnings
import warnings
warnings.simplefilter(action='ignore', category=Warning)
# Не показывать ValueWarning, ConvergenceWarning из statsmodels
from statsmodels.tools.sm_exceptions import ValueWarning, ConvergenceWarning
warnings.simplefilter('ignore', category=ValueWarning)
warnings.simplefilter('ignore', category=ConvergenceWarning)

In [2]:
y = np.log(web.DataReader(name='GDP', data_source='fred', start='1995-01-01'))
y.index = y.index.to_period(freq='Q')
# длина ряда
len(y)

120

In [3]:
forecaster = ARIMA(order=(1,0,1), trend='ct')

cv_strategy = SingleWindowSplitter(fh=np.arange(1, 11), window_length=110)

# инициализируем метрики
metric = [MeanSquaredError(square_root=False), MeanAbsoluteError(), MeanAbsolutePercentageError()]

cv_res = evaluate(forecaster=forecaster, y=y, cv=cv_strategy, strategy="refit", return_data=False, scoring=metric)
cv_res

Unnamed: 0,test_MeanSquaredError,test_MeanAbsoluteError,test_MeanAbsolutePercentageError,fit_time,pred_time,len_train_window,cutoff
0,0.00298,0.050684,0.004941,0.551495,0.010757,110,2022Q2


In [4]:
forecaster = ARIMA(order=(1,1,0), trend='c')

cv_strategy = SingleWindowSplitter(fh=np.arange(1, 11), window_length=110)

# инициализируем метрики
metric = [MeanSquaredError(square_root=False), MeanAbsoluteError(), MeanAbsolutePercentageError()]

cv_res = evaluate(forecaster=forecaster, y=y, cv=cv_strategy, strategy="refit", return_data=False, scoring=metric)
cv_res

Unnamed: 0,test_MeanSquaredError,test_MeanAbsoluteError,test_MeanAbsolutePercentageError,fit_time,pred_time,len_train_window,cutoff
0,0.000521,0.021771,0.002123,0.038296,0.010718,110,2022Q2


In [6]:
forecaster = ARIMA(order=(1,0,1), trend='ct')

cv_strategy = SlidingWindowSplitter(fh=np.arange(1, 6), initial_window=80, step_length=1)

# инициализируем метрики
metric = [MeanSquaredError(square_root=False), MeanAbsoluteError(), MeanAbsolutePercentageError()]

cv_res = evaluate(forecaster=forecaster, y=y, cv=cv_strategy, strategy="refit", return_data=False, scoring=metric)
cv_res

Unnamed: 0,test_MeanSquaredError,test_MeanAbsoluteError,test_MeanAbsolutePercentageError,fit_time,pred_time,len_train_window,cutoff
0,0.000456,0.017357,0.001767,0.139841,0.008613,80,2014Q4
1,0.000235,0.012614,0.001283,0.129532,0.008407,10,2015Q1
2,0.00035,0.017254,0.001755,0.122698,0.008377,10,2015Q2
3,0.00036,0.01876,0.001907,0.132433,0.008357,10,2015Q3
4,0.00022,0.014554,0.001478,0.090716,0.008354,10,2015Q4
5,5e-05,0.00691,0.000701,0.058031,0.008462,10,2016Q1
6,8e-06,0.002459,0.000249,0.073341,0.008212,10,2016Q2
7,9.1e-05,0.007349,0.000743,0.097579,0.008462,10,2016Q3
8,0.000181,0.010422,0.001052,0.0811,0.008234,10,2016Q4
9,0.000116,0.00877,0.000884,0.099168,0.008458,10,2017Q1


In [7]:

# средняя MSE, MAE, MAPE
cv_res.iloc[:,:len(metric)].mean()

test_MeanSquaredError               0.002658
test_MeanAbsoluteError              0.035909
test_MeanAbsolutePercentageError    0.003560
dtype: float64

In [8]:
# Зададим список из специфицированных моделей прогнозирования
forecasters = [ARIMA(order=(1,0,1), trend='ct'), ARIMA(order=(1,1,0), trend='c'), ARIMA(order=(1,1,1), trend='n'), ARIMA(order=(1,2,0), trend='n')]

# специфицируем метод кросс-валидации. Например, SlidingWindowSplitter
cv_strategy = SlidingWindowSplitter(fh=np.arange(1, 6), initial_window=80, step_length=1)

# инициализируем метрики
metric = [MeanSquaredError(square_root=False), MeanAbsoluteError(), MeanAbsolutePercentageError()]

# датафрейм с метриками по столбцам
cv_data = pd.DataFrame(data=None, columns=['MSE', 'MAE', 'MAPE'])

for model in forecasters:
	print(model)
	cv_res = evaluate(forecaster=model, y=y, cv=cv_strategy, strategy="refit", return_data=False, scoring=metric)
	# print(df.iloc[:,:len(metric)].mean()) # метрики для каждой модели
	cv_data.loc[len(cv_data.index)] = cv_res.iloc[:,[0,1,2]].mean().values

# результаты кросс-валидации в виде датафрейма
cv_data


ARIMA(order=(1, 0, 1), trend='ct')
ARIMA(order=(1, 1, 0), trend='c')
ARIMA(order=(1, 1, 1), trend='n')
ARIMA(order=(1, 2, 0), trend='n')


Unnamed: 0,MSE,MAE,MAPE
0,0.002658,0.035909,0.00356
1,0.003601,0.031448,0.003127
2,0.005739,0.040858,0.004059
3,0.021456,0.041858,0.004171


In [9]:
# Индекс модели
for i in range(cv_data.shape[1]):
	print(f'{cv_data.columns[i]}: model #={cv_data.iloc[:,i].argmin()}')

MSE: model #=0
MAE: model #=1
MAPE: model #=1


In [10]:
# Зададим список из специфицированных моделей прогнозирования
forecasters = [ARIMA(order=(1,0,1), trend='ct'), ARIMA(order=(1,1,0), trend='c'), ARIMA(order=(1,1,1), trend='n'), ARIMA(order=(1,2,0), trend='n')]

# специфицируем метод кросс-валидации. Например, SlidingWindowSplitter
cv_strategy = ExpandingWindowSplitter(fh=np.arange(1, 6), initial_window=80, step_length=1)

# инициализируем метрики
metric = [MeanSquaredError(square_root=False), MeanAbsoluteError(), MeanAbsolutePercentageError()]

# датафрейм с метриками по столбцам
cv_data = pd.DataFrame(data=None, columns=['MSE', 'MAE', 'MAPE'])

for model in forecasters:
	print(model)
	cv_res = evaluate(forecaster=model, y=y, cv=cv_strategy, strategy="refit", return_data=False, scoring=metric)
	# print(df.iloc[:,:len(metric)].mean()) # метрики для каждой модели
	cv_data.loc[len(cv_data.index)] = cv_res.iloc[:,[0,1,2]].mean().values

# результаты кросс-валидации в виде датафрейма
cv_data


ARIMA(order=(1, 0, 1), trend='ct')
ARIMA(order=(1, 1, 0), trend='c')
ARIMA(order=(1, 1, 1), trend='n')
ARIMA(order=(1, 2, 0), trend='n')


Unnamed: 0,MSE,MAE,MAPE
0,0.001791,0.028439,0.002831
1,0.00339,0.025859,0.002575
2,0.004383,0.028392,0.002829
3,0.005903,0.030223,0.003013


In [11]:
# Индекс модели
for i in range(cv_data.shape[1]):
	print(f'{cv_data.columns[i]}: model #={cv_data.iloc[:,i].argmin()}')

MSE: model #=0
MAE: model #=1
MAPE: model #=1


In [12]:
# Зададим список из специфицированных моделей прогнозирования
forecasters = [ARIMA(order=(1,0,1), trend='ct'), ARIMA(order=(1,1,0), trend='c'), ARIMA(order=(1,1,1), trend='n'), ARIMA(order=(1,2,0), trend='n')]

# специфицируем метод кросс-валидации. Например, SlidingWindowSplitter
cv_strategy = SingleWindowSplitter(fh=np.arange(1, 11), window_length=110)

# инициализируем метрики
metric = [MeanSquaredError(square_root=False), MeanAbsoluteError(), MeanAbsolutePercentageError()]

# датафрейм с метриками по столбцам
cv_data = pd.DataFrame(data=None, columns=['MSE', 'MAE', 'MAPE'])

for model in forecasters:
	print(model)
	cv_res = evaluate(forecaster=model, y=y, cv=cv_strategy, strategy="refit", return_data=False, scoring=metric)
	# print(df.iloc[:,:len(metric)].mean()) # метрики для каждой модели
	cv_data.loc[len(cv_data.index)] = cv_res.iloc[:,[0,1,2]].mean().values

# результаты кросс-валидации в виде датафрейма
cv_data


ARIMA(order=(1, 0, 1), trend='ct')
ARIMA(order=(1, 1, 0), trend='c')
ARIMA(order=(1, 1, 1), trend='n')
ARIMA(order=(1, 2, 0), trend='n')


Unnamed: 0,MSE,MAE,MAPE
0,0.00298,0.050684,0.004941
1,0.000521,0.021771,0.002123
2,0.000418,0.019466,0.001898
3,0.001522,0.03229,0.003145


In [13]:
# Индекс модели
for i in range(cv_data.shape[1]):
	print(f'{cv_data.columns[i]}: model #={cv_data.iloc[:,i].argmin()}')

MSE: model #=2
MAE: model #=2
MAPE: model #=2
