In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('data.csv')
df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')

# Формирование таблицы с временными рядами

Каждая строка таблицы соответствует определенной дате, каждая колонка таблицы - это временной ряд.

In [None]:
df_detailed_pivoted = df.pivot_table(columns=complete_divisions_list,
                                     values=target_parameter,
                                     index=date_parameter,
                                     aggfunc=np.nansum)

In [None]:
ind = pd.date_range(start=df_detailed_pivoted.index[0],
                    end=df_detailed_pivoted.index[-1],
                    freq='M')
df_detailed_pivoted = df_detailed_pivoted.reindex(ind,
                                                  fill_value=0.2)

# Импорт необходимых библиотек

In [None]:
from share_meta_model import ShareMetaModel
from mean_model import ConstantModel, SlidingConstantModel
from quarter_mean_model import QuarterConstantModel
from seasonal_heuristic_model import RobustSeasonalHeuristicModel, SimpleSeasonalHeuristicModel, SlidingSeasonalHeuristicModel
from best_model import BestModelChooser

# Формирование данных

In [None]:
df_detailed_pivoted[np.abs(df_detailed_pivoted) < 0.2] = 0.2
df_detailed_pivoted = df_detailed_pivoted.fillna(0.2)

# Создание зоопарка моделей

In [None]:
sliding_mean_models = {}

for sliding_window_size in (3, 6, 9, 12):
    sliding_mean_models['sliding_mean_' + str(sliding_window_size)] = \
        {'model_type': SlidingConstantModel,
         'model_config': {'mean_window_size': sliding_window_size,
                          'mean_type': 'mean'}}

In [None]:
median_models = {}

for window_size in (6, 9, 12):
    median_models['median_' + str(window_size)] = \
        {'model_type': ConstantModel,
         'model_config': {'mean_window_size': window_size,
                          'mean_type': 'median'}}

In [None]:
quarter_models = {}

for n_quarters in (1, 2, 3):
    quarter_models['quarter_' + str(n_quarters)] = \
        {'model_type': QuarterConstantModel,
         'model_config': {'n_quarters': n_quarters,
                          'mean_type': 'mean',
                          'remove_incomplete_latest_quarter': True}}

In [None]:
seasonal_models = {}

for past_smoothing_offset_size in (-1, -2):
    seasonal_models['seasonal_' + str(-past_smoothing_offset_size * 2 + 1)] = \
        {'model_type': RobustSeasonalHeuristicModel,
         'model_config': {'past_smoothing_offset_size': past_smoothing_offset_size,
                          'future_smoothing_offset_size': -past_smoothing_offset_size,
                          'smoothing_n_years': 1,
                          'reconcile_forecasts': True,
                          'almost_zero_value': 1e-19,
                          'n_years': 1,
                          'n_months': 3}}

In [None]:
simple_models = {**sliding_mean_models, 
                 **median_models,
                 **quarter_models,
                 **seasonal_models}

In [None]:
models_zoo = {**simple_models,
              'nan': median_models['median_6']}

# Прогноз

In [None]:
model = BestModelChooser(models_zoo=models_zoo,
                         group_columns_to_estimate_mape=[],
                         series=df_detailed_pivoted,
                         horizon=12,
                         backtest_depth=6,
                         backtest_horizon=3,
                         max_n_series_to_process_at_once=40000,
                         percentage_to_select_best_models=15,
                         additive_percentage_to_select_best_models=2)
model.fit()
predictions = model.predict()

# Проверка на наличие пропусков в прогнозе

In [None]:
df = predictions.isna().sum(axis=0)
df[df > 0].index

# Заполнение близких к нулю значений обычным 0

In [None]:
predictions[np.abs(predictions) <= 0.21] = 0.0

# Просмотр списка рядов, для которого делается прогноз заданной моделью

In [None]:
model.divisions_for_model['seasonal_5']

# Ошибки всех моделей на данном ряде

In [None]:
model.models_errors.loc[]

# Все модели, вошедшие в ансамбль для данного разреза

Значение `True` означает, что модель входит в ансамбль, значение `False` - не входит

In [None]:
model.best_models.loc[]