In [1]:
import numpy as np
import pandas as pd

# Модель для прогнозирования, например ARIMA
from sktime.forecasting.arima import ARIMA
# Визуализация временных рядов
from sktime.utils.plotting import plot_series
# Модули для кросс-валидации
from sktime.split import temporal_train_test_split, ExpandingWindowSplitter, SlidingWindowSplitter, SingleWindowSplitter
from sktime.forecasting.model_evaluation import evaluate
from sktime.performance_metrics.forecasting import MeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError # Метрики MSE, MAE, MAPE
# Поиск оптимальных гиперпараметров по сетке
from sktime.forecasting.model_selection import ForecastingGridSearchCV

import pandas_datareader.data as web

# настройки визуализации
import matplotlib.pyplot as plt

# Не показывать Warnings
import warnings
warnings.simplefilter(action='ignore', category=Warning)
# Не показывать ValueWarning, ConvergenceWarning из statsmodels
from statsmodels.tools.sm_exceptions import ValueWarning, ConvergenceWarning
warnings.simplefilter('ignore', category=ValueWarning)
warnings.simplefilter('ignore', category=ConvergenceWarning)

In [2]:
y = np.log(web.DataReader(name='M2SL', data_source='fred', start='1995-01-01'))
y.index = y.index.to_period(freq='M')

In [3]:
# Зададим метод прогнозирования
forecaster = ARIMA()

# разбиваем параметры кросс-валидации
cv_strategy = ExpandingWindowSplitter(fh=np.arange(1, 6), initial_window=100, step_length=5)

# Задаём сетку для значений параметров модели в виде словаря
# будем менять параметры order и trend
param_grid = {'order':[(2,0,2), (2,1,0), (2,1,1), (1,2,0)], 'trend': ['ct', 'c', 'n', 'n']}

# инициализируем метрики
metric = MeanSquaredError(square_root=False)

# Grid search
gscv = ForecastingGridSearchCV(forecaster=forecaster, param_grid=param_grid, cv=cv_strategy, scoring=metric)

gscv.fit(y)

In [4]:
# Параметры оптимальной модели
gscv.get_fitted_params()['best_forecaster']

In [5]:
# Зададим метод прогнозирования
forecaster = ARIMA()

# разбиваем параметры кросс-валидации
cv_strategy = SlidingWindowSplitter(fh=np.arange(1, 6), initial_window=100, step_length=5)

# Задаём сетку для значений параметров модели в виде словаря
# будем менять параметры order и trend
param_grid = {'order':[(2,0,2), (2,1,0), (2,1,1), (1,2,0)], 'trend': ['ct', 'c', 'n', 'n']}

# инициализируем метрики
metric = MeanSquaredError(square_root=False)

# Grid search
gscv = ForecastingGridSearchCV(forecaster=forecaster, param_grid=param_grid, cv=cv_strategy, scoring=metric)

gscv.fit(y)

In [6]:
# Параметры оптимальной модели
gscv.get_fitted_params()['best_forecaster']

In [7]:
gscv.get_fitted_params()['best_forecaster'].summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,361.0
Model:,"SARIMAX(2, 1, 1)",Log Likelihood,1439.269
Date:,"Tue, 25 Mar 2025",AIC,-2868.538
Time:,10:57:04,BIC,-2849.108
Sample:,01-31-1995,HQIC,-2860.812
,- 01-31-2025,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0026,0.005,0.503,0.615,-0.007,0.013
ar.L1,0.2826,2.690,0.105,0.916,-4.991,5.556
ar.L2,0.2066,1.687,0.122,0.903,-3.100,3.513
ma.L1,0.3557,2.694,0.132,0.895,-4.924,5.635
sigma2,1.965e-05,6.51e-07,30.200,0.000,1.84e-05,2.09e-05

0,1,2,3
Ljung-Box (L1) (Q):,0.04,Jarque-Bera (JB):,7354.39
Prob(Q):,0.85,Prob(JB):,0.0
Heteroskedasticity (H):,2.27,Skew:,2.38
Prob(H) (two-sided):,0.0,Kurtosis:,24.63
