In [1]:
import numpy as np
import pandas as pd

# Модель для прогнозирования, например ARIMA
from sktime.forecasting.arima import ARIMA
# Визуализация временных рядов
from sktime.utils.plotting import plot_series
# Модули для кросс-валидации
from sktime.split import temporal_train_test_split, ExpandingWindowSplitter, SlidingWindowSplitter, SingleWindowSplitter
from sktime.forecasting.model_evaluation import evaluate
from sktime.performance_metrics.forecasting import MeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError # Метрики MSE, MAE, MAPE
# Поиск оптимальных гиперпараметров по сетке
from sktime.forecasting.model_selection import ForecastingGridSearchCV

import pandas_datareader.data as web

# настройки визуализации
import matplotlib.pyplot as plt

# Не показывать Warnings
import warnings
warnings.simplefilter(action='ignore', category=Warning)
# Не показывать ValueWarning, ConvergenceWarning из statsmodels
from statsmodels.tools.sm_exceptions import ValueWarning, ConvergenceWarning
warnings.simplefilter('ignore', category=ValueWarning)
warnings.simplefilter('ignore', category=ConvergenceWarning)

In [2]:
y = np.log(web.DataReader(name='GDP', data_source='fred', start='1995-01-01'))
y.index = y.index.to_period(freq='Q')

In [3]:
# Зададим метод прогнозирования
forecaster = ARIMA()

# разбиваем параметры кросс-валидации
cv_strategy = ExpandingWindowSplitter(fh=np.arange(1, 6), initial_window=80, step_length=1)

# Задаём сетку для значений параметров модели в виде словаря
# будем менять параметры order и trend
param_grid = {'order':[(1,0,1), (1,1,0), (1,1,1), (1,2,0), (0,1,1), (0,2,0), (1,1,1)], 'trend': ['ct', 'c', 'c', 'n', 'c', 'n', 'c']}

# инициализируем метрики
metric = MeanSquaredError(square_root=False)

# Grid search
gscv = ForecastingGridSearchCV(forecaster=forecaster, param_grid=param_grid, cv=cv_strategy, scoring=metric)

gscv.fit(y)

In [4]:
# Параметры оптимальной модели
gscv.get_fitted_params()['best_forecaster']

In [5]:
# Зададим метод прогнозирования
forecaster = ARIMA()

# разбиваем параметры кросс-валидации
cv_strategy = ExpandingWindowSplitter(fh=np.arange(1, 6), initial_window=80, step_length=1)

# Задаём сетку для значений параметров модели в виде словаря
# будем менять параметры order и trend
param_grid = {'order':[(1,0,1), (1,1,0), (1,1,1), (1,2,0), (0,1,1), (0,2,0), (1,1,1)], 'trend': ['ct', 'c', 'c', 'n', 'c', 'n', 'c']}

# инициализируем метрики
metric = MeanAbsoluteError()

# Grid search
gscv = ForecastingGridSearchCV(forecaster=forecaster, param_grid=param_grid, cv=cv_strategy, scoring=metric)

gscv.fit(y)

In [6]:
# Параметры оптимальной модели
gscv.get_fitted_params()['best_forecaster']

In [11]:
gscv.get_fitted_params()['best_forecaster'].summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,1362.0
Model:,"SARIMAX(2, 0, 2)",Log Likelihood,5791.529
Date:,"Tue, 25 Mar 2025",AIC,-11573.057
Time:,10:57:45,BIC,-11546.974
Sample:,03-31-1995,HQIC,-11563.293
,- 03-31-2021,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.0066,0.012,0.569,0.569,-0.016,0.029
ar.L2,0.9933,0.012,85.278,0.000,0.970,1.016
ma.L1,0.9859,0.013,78.012,0.000,0.961,1.011
ma.L2,0.4976,0.010,48.032,0.000,0.477,0.518
sigma2,1.199e-05,1.92e-07,62.351,0.000,1.16e-05,1.24e-05

0,1,2,3
Ljung-Box (L1) (Q):,9.45,Jarque-Bera (JB):,84244.36
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.9,Skew:,3.15
Prob(H) (two-sided):,0.24,Kurtosis:,41.01
