In [170]:
import datetime as dt

In [250]:
import json
from datetime import timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

import torch
from chronos import ChronosPipeline

from sklearn.metrics import mean_absolute_error, mean_absolute_error, mean_absolute_percentage_error

In [251]:
import plotly.graph_objects as go

layout_params = {
    'font_color': '#000000',
    'font_family': 'Avenir Next',
    'font_size': 11,
    'margin': {'l': 0, 'r': 0, 't': 80, 'b': 0},
    'paper_bgcolor': '#FFFFFF',
    'plot_bgcolor': '#f5f5f5'
}

In [252]:
pipeline = ChronosPipeline.from_pretrained(
  "amazon/chronos-t5-tiny",
  device_map="mps",
  torch_dtype=torch.bfloat16,
)

In [253]:
# load data

PATH = "data"

sales_df = pd.read_csv(f"{PATH}/shop_sales.csv")
dates_df = pd.read_csv(f"{PATH}/shop_sales_dates.csv")

# preprocces
train = sales_df.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
# del dates_df, sales_df

train['store_item_id'] = train.item_id
train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])

train.date = pd.to_datetime(train.date)
train = train.rename(columns={'cnt': 'sales'})

In [169]:
def get_historical_data(item_store_ids: list, data_path: str):
    
    # load data
    train = pd.read_csv(f"{data_path}/shop_sales.csv")
    dates_df = pd.read_csv(f"{data_path}/shop_sales_dates.csv")

    # preprocces
    train = train.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
    train['store_item_id'] = train.item_id
    train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])
    
    # get ids train data
    train = train[train.store_item_id.isin(item_store_ids)]
    
    return train

In [None]:
# def preprocess_data(train: pd.DataFrame(), prediction_type: str):
    
#     if prediction_type == 'week':
#         train = train.groupby(['wm_yr_wk', 'store_item_id'], as_index=False).

In [199]:
def get_validation_dates(train: pd.DataFrame(), prediction_length, prediction_type: None):
    
    train.date = pd.to_datetime(train.date)
    end_date = train['date'].max()
    start_date = end_date - dt.timedelta(days=prediction_length-1)
    date_range = pd.date_range(start_date, end_date)

    return date_range

In [195]:
# valid = train[train.date.isin(date_range)].copy()
# train = train[~train.date.isin(date_range)]

In [217]:
def make_predictions(prediction_length: int, train: pd.DataFrame(), date_range: list):
    
    all_predicts = []
    for store_item_id in train.store_item_id.unique():
        context = torch.tensor(df_train["sales"].tolist())
        forecast = pipeline.predict(context, prediction_length)  
        low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)
         
        # result = {
        #     'store_item_id': store_item_id,
        #     'date': date_range.tolist(),
        #     'low': low.tolist(),
        #     'median': median.tolist(),
        #     'high': high.tolist()
        # }
        # all_predicts.append(result) 
        
        data = {
            'date': date_range.tolist(), 
            'low': low.tolist(),
            'median': median.tolist(),
            'high': high.tolist()
            }
        result = pd.DataFrame.from_dict(data)
        result['store_item_id'] = store_item_id
        
        all_predicts.append(result) 
        
    all_predicts = pd.concat(all_predicts)    
          
    return all_predicts

In [236]:

store_item_ids = train.store_item_id.unique()
data_path = "data"
prediction_length = 30

train = get_historical_data(item_store_ids=store_item_ids, data_path=data_path)
date_range = get_validation_dates(train=train, prediction_length=prediction_length, prediction_type=None)

valid = train[train.date.isin(date_range)].copy()
train = train[~train.date.isin(date_range)]

result = make_predictions(prediction_length=prediction_length, train=train, date_range=date_range)


In [237]:
valid = valid.merge(result, on=['store_item_id', 'date'])

In [238]:
mean_absolute_error(valid['cnt'], valid['median'])

np.float64(15.784673581219623)

In [239]:
for store_item_id in train.store_item_id.unique():
    test = valid[valid.store_item_id==store_item_id].copy()
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=test['date'], y=test['cnt'], mode='lines', name='test'))
    fig.add_trace(go.Scatter(x=test['date'], y=test['median'], mode='lines', name='pred'))
    fig.add_trace(go.Scatter(x=test['date'], y=test['low'], mode='lines', name='low'))
    fig.add_trace(go.Scatter(x=test['date'], y=test['high'], mode='lines', name='high'))
    fig.update_layout(**layout_params, width=700).show()

In [None]:
import json
from datetime import timedelta

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

import torch
from chronos import ChronosPipeline


def get_historical_data(item_store_ids: list, data_path: str):
    
    # load data
    train = pd.read_csv(f"{data_path}/shop_sales.csv")
    dates_df = pd.read_csv(f"{data_path}/shop_sales_dates.csv")

    # preprocces
    train = train.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
    train['store_item_id'] = train.item_id
    train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])
    
    # get ids train data
    train = train[train.store_item_id.isin(item_store_ids)]
    
    return train


def get_validation_dates(train: pd.DataFrame(), prediction_length, prediction_type: None):
    
    train.date = pd.to_datetime(train.date)
    end_date = train['date'].max()
    start_date = end_date - dt.timedelta(days=prediction_length-1)
    date_range = pd.date_range(start_date, end_date)

    return date_range


def make_predictions(prediction_length: int, train: pd.DataFrame(), date_range: list):
    
    all_predicts = []
    
    pipeline = ChronosPipeline.from_pretrained(
        "amazon/chronos-t5-tiny",
        device_map="mps", #"cpu"
        torch_dtype=torch.bfloat16,
        )
    
    for store_item_id in train.store_item_id.unique():
        context = torch.tensor(df_train["sales"].tolist())
        forecast = pipeline.predict(context, prediction_length)  
        low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)
         
        # result = {
        #     'store_item_id': store_item_id,
        #     'date': date_range.tolist(),
        #     'low': low.tolist(),
        #     'median': median.tolist(),
        #     'high': high.tolist()
        # }
        # all_predicts.append(result) 
        
        data = {
            'date': date_range.tolist(), 
            'low': low.tolist(),
            'median': median.tolist(),
            'high': high.tolist()
            }
        result = pd.DataFrame.from_dict(data)
        result['store_item_id'] = store_item_id
        
        all_predicts.append(result) 
        
    all_predicts = pd.concat(all_predicts)    
          
    return all_predicts

def run():
    store_item_ids = train.store_item_id.unique()
    data_path = "data"
    prediction_length = 30

    train = get_historical_data(item_store_ids=store_item_ids, data_path=data_path)
    date_range = get_validation_dates(train=train, prediction_length=prediction_length, prediction_type=None)

    valid = train[train.date.isin(date_range)].copy()
    train = train[~train.date.isin(date_range)]

    result = make_predictions(prediction_length=prediction_length, train=train, date_range=date_range)


         date       low     median        high store_item_id
0  2015-12-23  5.909453   8.991706   15.104337   STORE_2_085
1  2015-12-24  5.909453  11.520624   16.807574   STORE_2_085
2  2015-12-25  4.712000   9.985982   15.199442   STORE_2_085
3  2015-12-26  3.968455   7.003156   15.000587   STORE_2_085
4  2015-12-27  3.916580   7.500294   14.006312   STORE_2_085
..        ...       ...        ...         ...           ...
25 2016-01-17  0.000001   0.720725   79.351621   STORE_2_090
26 2016-01-18  0.000001   0.720725   72.072319   STORE_2_090
27 2016-01-19  0.000001   0.360363   84.396689   STORE_2_090
28 2016-01-20  0.000001   0.720725  123.964391   STORE_2_090
29 2016-01-21  0.000001   1.441448  125.333760   STORE_2_090

[150 rows x 5 columns]


In [294]:
predictions.to_csv("data/predictions.csv", index=False)

In [277]:
pd.date_range(start_date, end_date)

DatetimeIndex(['2016-01-22'], dtype='datetime64[ns]', freq='D')

In [380]:
# Для системы

import json
import pandas as pd
import numpy as np
import torch
from datetime import timedelta
from tqdm.notebook import tqdm
from chronos import ChronosPipeline

class SalesPredictor:
    def __init__(self, data_path: str, prediction_length: int, store_item_ids: list, start_prediction_date: dt.datetime):
        
        self.start_prediction_date = start_prediction_date
        self.store_item_ids = store_item_ids
        self.data_path = data_path
        self.prediction_length = prediction_length
        self.pipeline = ChronosPipeline.from_pretrained(
            "amazon/chronos-t5-tiny",
            device_map="mps", # на маке mps, но надо будет поменять на "cpu"
            torch_dtype=torch.bfloat16,
        )

    def get_historical_data(self) -> pd.DataFrame:
        """Загрузка и предварительная обработка исторических данных."""
        
        #### TO DO - заменить подрузгу данных
        
        train = pd.read_csv(f"{self.data_path}/shop_sales.csv")
        dates_df = pd.read_csv(f"{self.data_path}/shop_sales_dates.csv")
        
        # Объединение и предобработка данных 
        train = train.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
        train['store_item_id'] = train.item_id
        train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])
        
        # Фильтрация данных
        train = train[train.store_item_id.isin(self.store_item_ids)]
        
        return train


    def make_predictions(self, train: pd.DataFrame) -> pd.DataFrame:
            """Создание предсказаний для каждого store_item_id."""
            
            all_predicts = []
            
            for store_item_id in train.store_item_id.unique():

                store_item_id_df = check[check.store_item_id == store_item_id].copy()
                store_id = store_item_id_df.store_id.unique()[0]
                item_id = store_item_id_df.item_id.unique()[0]

                full_date_range = pd.date_range(store_item_id_df.date.min(), self.start_prediction_date - timedelta(days=1))
                full_date_range = pd.DataFrame(data=full_date_range, columns=['date'])
                store_item_id_df = store_item_id_df.merge(full_date_range, on='date', how='right')
                
                context = torch.tensor(store_item_id_df["sales"].tolist())
                forecast = self.pipeline.predict(context, self.prediction_length)
                low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)

                # Формирование DataFrame с результатами
                data = {
                    'date': predict_range.tolist(),
                    'low': low.tolist(),
                    'median': median.tolist(),
                    'high': high.tolist()
                }
                result = pd.DataFrame.from_dict(data)
                result['store_item_id'] = store_item_id
                result['store_id'] = store_id
                result['item_id'] = item_id
                
                all_predicts.append(result)

            return pd.concat(all_predicts)

    def run(self):
        """Основной метод для запуска предсказаний."""
        
        train = self.get_historical_data()
        result = self.make_predictions(train=train)
        
        return result

# Пример использования
if __name__ == "__main__":
    data_path = "data"
    ### Достать из теста - минимальная дата, которая есть в тесте
    start_prediction_date = dt.datetime(2015, 12, 23)
    ###
    prediction_length = 28
    ids = [
    'STORE_2_085', 
    'STORE_2_043', 
    'STORE_2_054', 
    'STORE_2_325',
    'STORE_2_090', 
    'Имитация айди без истории'
    ]

    predictor = SalesPredictor(data_path=data_path, prediction_length=prediction_length, store_item_ids=ids, start_prediction_date=start_prediction_date)

    predictions = predictor.run()

# Кластеризация

In [309]:
train

Unnamed: 0,item_id,store_id,date_id,sales,date,year,wm_yr_wk,store_item_id,week
0,085,STORE_2,1,3,2011-01-29,2011,11101,STORE_2_085,5
1,085,STORE_2,2,8,2011-01-30,2011,11101,STORE_2_085,6
2,085,STORE_2,3,0,2011-01-31,2011,11101,STORE_2_085,0
3,085,STORE_2,4,3,2011-02-01,2011,11101,STORE_2_085,1
4,085,STORE_2,5,0,2011-02-02,2011,11101,STORE_2_085,2
...,...,...,...,...,...,...,...,...,...
81850,727,STORE_1,1815,2,2016-01-17,2016,11551,STORE_1_727,6
81851,727,STORE_1,1816,3,2016-01-18,2016,11551,STORE_1_727,0
81852,727,STORE_1,1817,1,2016-01-19,2016,11551,STORE_1_727,1
81853,727,STORE_1,1818,4,2016-01-20,2016,11551,STORE_1_727,2


In [634]:
import pandas as pd
import numpy as np
import torch
from datetime import timedelta
from chronos import ChronosPipeline

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans


    def get_historical_data(self) -> pd.DataFrame:
        """Загрузка и предварительная обработка исторических данных."""
        
        #### TO DO - заменить подрузгу данных
        
        train = pd.read_csv(f"{self.data_path}/shop_sales.csv")
        dates_df = pd.read_csv(f"{self.data_path}/shop_sales_dates.csv")
        
        # Объединение и предобработка данных 
        train = train.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
        train['store_item_id'] = train.item_id
        train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])
        
        return train


def select_item_store_id(train):
    
    select = train.groupby(["store_item_id", "item_id"], as_index=False).sales.sum()
    select['max_value'] = select.groupby(["item_id"], as_index=False).sales.transform('max')
    select = select[select.sales == select.max_value].store_item_id.unique()

    return select

def get_clusters(select, train):
    
    item_embeddings = []
    item_ids = []

    for store_item_id in select:
        store_item_id_df = train[train.store_item_id == store_item_id].copy()
        item_id = store_item_id_df.item_id.unique()[0]
        
        context = torch.tensor(store_item_id_df["sales"].tolist())
        embeddings, tokenizer_state = pipeline.embed(context)
        embedding = embeddings[0].mean(axis=0).float().numpy().tolist()
        
        item_ids.append(item_id)
        item_embeddings.append(embedding)
        
    data = np.array(item_embeddings)

    pca = PCA(n_components=2)
    data_reduced = pca.fit_transform(data)

    kmeans = KMeans(n_clusters=7, random_state=42)  
    kmeans.fit(data_reduced)

    # Получаем метки кластеров
    clusters = kmeans.labels_

    df_clusters = pd.DataFrame({
        'item_id': item_ids,  
        'cluster': clusters  
    })
    
    train = train.merge(df_clusters, on='item_id')
    return train

IndentationError: unexpected indent (156000847.py, line 11)

In [645]:
import pandas as pd
import numpy as np
import torch
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from chronos import ChronosPipeline

class ClusterModel:
    def __init__(self, data_path: str, n_clusters: int = 7, pca_components: int = 2):

        self.data_path = data_path
        self.n_clusters = n_clusters
        self.pca_components = pca_components
        self.pipeline = ChronosPipeline.from_pretrained(
            "amazon/chronos-t5-tiny",
            device_map="mps", # на маке mps, но надо будет поменять на "cpu"
            torch_dtype=torch.bfloat16,
        )
        self.train = None

    def get_historical_data(self) -> pd.DataFrame:
        """
        Загрузка и предварительная обработка исторических данных.
        """
        ### Поменять путь
        train = pd.read_csv(f"{self.data_path}/shop_sales.csv")
        dates_df = pd.read_csv(f"{self.data_path}/shop_sales_dates.csv")
        
        # Объединение и предобработка данных 
        train = train.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
        train['store_item_id'] = train.item_id
        train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])
        
        self.train = train
        return train

    def select_item_store_ids(self) -> np.ndarray:
        """
        Выбор уникальных store_item_id с максимальными значениями продаж.
        """
        
        select = self.train.groupby(["store_item_id", "item_id"], as_index=False).cnt.sum()
        select['max_value'] = select.groupby("item_id")['cnt'].transform('max')
        select = select[select.cnt == select.max_value].store_item_id.unique()
        
        return select

    def get_embeddings(self, store_item_id: str) -> np.ndarray:
        """
        Получение эмбеддингов для конкретного store_item_id.
        """
        store_item_df = self.train[self.train.store_item_id == store_item_id].copy()
        context = torch.tensor(store_item_df["cnt"].tolist())
        embeddings, _ = self.pipeline.embed(context)
        return embeddings[0].mean(axis=0).float().numpy().tolist()

    def get_clusters(self) -> pd.DataFrame:
        """
        Получение кластеров для товаров.
        """
        
        select_ids = self.select_item_store_ids()
    
        item_embeddings = []
        item_ids = []

        for store_item_id in select_ids:
            item_id = self.train[self.train.store_item_id == store_item_id].item_id.unique()[0]
            embedding = self.get_embeddings(store_item_id)
    
            item_ids.append(item_id)
            item_embeddings.append(embedding)
            
        data = np.array(item_embeddings)
        pca = PCA(n_components=self.pca_components)
        data_reduced = pca.fit_transform(item_embeddings)
        
        kmeans = KMeans(n_clusters=self.n_clusters, random_state=9)
        clusters = kmeans.fit_predict(data_reduced)

        df_clusters = pd.DataFrame({
            'item_id': item_ids,
            'cluster': clusters
        })
        
        self.train = self.train.merge(df_clusters, on='item_id')
        return self.train

    def run(self) -> pd.DataFrame:
        """
        Запуск полного процесса кластеризации.
        """
        self.get_historical_data()
        return self.get_clusters()


# Пример использования
if __name__ == "__main__":
    
    ## поменять путь
    
    data_path = "data"  
    cluster_model = ClusterModel(data_path=data_path, n_clusters=7, pca_components=2)
    result = cluster_model.run()

  item_id store_id  date_id  cnt        date  year  wm_yr_wk store_item_id  \
0     085  STORE_2        1    3  2011-01-29  2011     11101   STORE_2_085   
1     085  STORE_2        2    8  2011-01-30  2011     11101   STORE_2_085   
2     085  STORE_2        3    0  2011-01-31  2011     11101   STORE_2_085   
3     085  STORE_2        4    3  2011-02-01  2011     11101   STORE_2_085   
4     085  STORE_2        5    0  2011-02-02  2011     11101   STORE_2_085   

   cluster  
0        4  
1        4  
2        4  
3        4  
4        4  


In [515]:
colors = ['red', 'blue', 'green', 'yellow', 'orange', 'grey', 'black']
for store_id in train.store_id.unique():
    for cluster in df.cluster.unique():
        fig = go.Figure()
        for product_id in df[df.cluster == cluster].product_id.unique():
            plot = train[(train.item_id == product_id)&(train.store_id==store_id)].copy()
            fig.add_trace(go.Scatter(x=plot['date'], y=plot['sales'], mode='lines', marker_color=colors[cluster]))
        fig.update_layout(**layout_params, width=700, title=f"cluster - {cluster} store - {store_id}").show()

#  Выделение сезонности 

In [595]:
from scipy.fftpack import fft, ifft

def get_historical_data(self) -> pd.DataFrame:
    """Загрузка и предварительная обработка исторических данных."""
    
    #### TO DO - заменить подрузгу данных
    
    train = pd.read_csv(f"{self.data_path}/shop_sales.csv")
    dates_df = pd.read_csv(f"{self.data_path}/shop_sales_dates.csv")
    
    # Объединение и предобработка данных 
    train = train.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
    train['store_item_id'] = train.item_id
    train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])
    
    # Фильтрация данных
    train = train[train.store_item_id.isin(self.store_item_ids)]
    
    return train

def get_seasonality_trend(train):

    res = pd.DataFrame()

    for store_item_id in train.store_item_id.unique():
        store_item_id_df = train[train.store_item_id == store_item_id].copy()
        store_item_id_df = store_item_id_df[store_item_id_df.date >= "2015-01-01"]

        window_size = 30  
        trend = store_item_id_df.sales.rolling(window=window_size, center=False).mean()  # Скользящее среднее
        store_item_id_df['trend'] = trend
        store_item_id_df = store_item_id_df.dropna()

        # 2. Выделение сезонности с использованием FFT
        # Преобразуем временной ряд в частотное пространство
        fft_result = fft((store_item_id_df.sales - store_item_id_df.trend).values)

        # Убираем тренд перед применением FFT
        fft_result_filtered = np.copy(fft_result)

        # Оставляем только компоненты с низкими частотами для выделения сезонности
        fft_result_filtered[50:] = 0  # Убираем высокочастотные компоненты

        # Обратное преобразование для получения сезонности
        seasonality = np.real(ifft(fft_result_filtered))
        store_item_id_df['seasonality'] = seasonality
        
        res = pd.concat([res, store_item_id_df])
        
        return res

In [652]:
import pandas as pd
import numpy as np
from scipy.fftpack import fft, ifft

class TimeSeriesAnalyzer:
    def __init__(self, data_path: str, window_size: int = 30, freq_cutoff: int = 50):

        self.data_path = data_path
        self.window_size = window_size
        self.freq_cutoff = freq_cutoff
        self.train = None

    def get_historical_data(self) -> pd.DataFrame:

        # Загрузка данных
        train = pd.read_csv(f"{self.data_path}/shop_sales.csv")
        dates_df = pd.read_csv(f"{self.data_path}/shop_sales_dates.csv")
        
        # Объединение и предобработка данных 
        train = train.merge(dates_df[['date', 'date_id', 'year', 'wm_yr_wk']], on='date_id')
        train['store_item_id'] = train.item_id
        train.item_id = train.item_id.apply(lambda x: x.split('_')[-1])
        
        self.train = train
        return train

    def get_seasonality_trend(self) -> pd.DataFrame:

        res = pd.DataFrame()

        for store_item_id in self.train.store_item_id.unique():
            store_item_id_df = self.train[self.train.store_item_id == store_item_id].copy()
            store_item_id_df = store_item_id_df[store_item_id_df.date >= "2015-01-01"]

            trend = store_item_id_df.cnt.rolling(window=self.window_size, center=False).mean()
            store_item_id_df['trend'] = trend
            store_item_id_df = store_item_id_df.dropna()


            fft_result = fft((store_item_id_df.cnt - store_item_id_df.trend).values)
            fft_result_filtered = np.copy(fft_result)
            fft_result_filtered[self.freq_cutoff:] = 0  
            
            seasonality = np.real(ifft(fft_result_filtered))
            store_item_id_df['seasonality'] = seasonality
            
            res = pd.concat([res, store_item_id_df])
        
        return res

    def run(self) -> pd.DataFrame:

        self.get_historical_data()
        return self.get_seasonality_trend()

# Пример использования
if __name__ == "__main__":
    data_path = "data"  

    analyzer = TimeSeriesAnalyzer(
        data_path=data_path,
        window_size=30,
        freq_cutoff=50
    )
    result = analyzer.run()


     item_id store_id  date_id  cnt        date  year  wm_yr_wk store_item_id  \
1462     085  STORE_2     1463    2  2015-01-30  2015     11452   STORE_2_085   
1463     085  STORE_2     1464    6  2015-01-31  2015     11501   STORE_2_085   
1464     085  STORE_2     1465    4  2015-02-01  2015     11501   STORE_2_085   
1465     085  STORE_2     1466    5  2015-02-02  2015     11501   STORE_2_085   
1466     085  STORE_2     1467    3  2015-02-03  2015     11501   STORE_2_085   

         trend  seasonality  
1462  2.066667     0.843064  
1463  2.266667     0.801512  
1464  2.366667     0.711920  
1465  2.466667     0.610595  
1466  2.566667     0.528621  


In [654]:
from plotly.subplots import make_subplots
for count, store_item_id in enumerate(train.store_item_id.unique()):
    plot = result[result.store_item_id == store_item_id].copy()
    
    fig = make_subplots(rows=3, cols=1)
    fig.add_trace(go.Scatter(x=plot['date'], y=plot['cnt'], mode='lines', marker_color=colors[cluster], name='sales'), row=1, col=1)
    fig.add_trace(go.Scatter(x=plot['date'], y=plot['trend'], mode='lines', marker_color='red', name='trend'), row=2, col=1)
    fig.add_trace(go.Scatter(x=plot['date'], y=plot['seasonality'], mode='lines', marker_color='blue', name='seasonality'), row=3, col=1)
    fig.update_layout(**layout_params, width=1000).show()
    
    if count > 5:
        break
        

# Качество модели

In [597]:
predictions

Unnamed: 0,date,low,median,high,store_item_id,store_id,item_id
0,2015-12-23,7.000591,14.995092,20.366535,STORE_2_085,STORE_2,085
1,2015-12-24,6.499314,14.001183,20.405426,STORE_2_085,STORE_2,085
2,2015-12-25,2.981733,11.991753,18.214500,STORE_2_085,STORE_2,085
3,2015-12-26,2.882342,8.491457,16.014933,STORE_2_085,STORE_2,085
4,2015-12-27,4.018858,9.485369,16.429782,STORE_2_085,STORE_2,085
...,...,...,...,...,...,...,...
23,2016-01-15,0.000001,0.722063,82.387255,STORE_2_090,STORE_2,090
24,2016-01-16,0.000001,0.361032,82.170638,STORE_2_090,STORE_2,090
25,2016-01-17,0.000001,0.000001,65.635421,STORE_2_090,STORE_2,090
26,2016-01-18,0.000001,0.361032,61.591875,STORE_2_090,STORE_2,090


In [623]:
test = train[(train['store_item_id'].isin(predictions.store_item_id.unique()))&(train['date'].isin(predictions.date.unique()))]

In [624]:
predictions_metrics = predictions[predictions.date.isin(test.date.unique())]

In [625]:
predictions_metrics = predictions_metrics.merge(test[['date', 'store_item_id', 'sales']], on=['date', 'store_item_id'])

In [632]:
plot = predictions_metrics.groupby('store_item_id')[['median', 'sales']].sum()
plot['ape'] = abs(plot['sales'] - plot['median']) / plot['sales']

In [633]:
plot['ape'].mean()

np.float64(0.9641477232152743)

In [628]:
predictions_metrics

Unnamed: 0,date,low,median,high,store_item_id,store_id,item_id,sales,ae
0,2015-12-23,7.000591,14.995092,20.366535,STORE_2_085,STORE_2,085,9,0.666121
1,2015-12-24,6.499314,14.001183,20.405426,STORE_2_085,STORE_2,085,4,2.500296
2,2015-12-25,2.981733,11.991753,18.214500,STORE_2_085,STORE_2,085,0,inf
3,2015-12-26,2.882342,8.491457,16.014933,STORE_2_085,STORE_2,085,18,0.528252
4,2015-12-27,4.018858,9.485369,16.429782,STORE_2_085,STORE_2,085,7,0.355053
...,...,...,...,...,...,...,...,...,...
135,2016-01-15,0.000001,0.722063,82.387255,STORE_2_090,STORE_2,090,103,0.992990
136,2016-01-16,0.000001,0.361032,82.170638,STORE_2_090,STORE_2,090,117,0.996914
137,2016-01-17,0.000001,0.000001,65.635421,STORE_2_090,STORE_2,090,106,1.000000
138,2016-01-18,0.000001,0.361032,61.591875,STORE_2_090,STORE_2,090,53,0.993188
