In [3]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt

In [4]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [9]:
class MilkTSDataset(Dataset):
    '''
    Use this Dataset for particular `class_name`, `store`, `item`
    And use ConcatDataset for concatination datasets from several `class_name` and `store`
    
    `params`:
        `class_name`:str: type names which should be choosed (value should be contained in column `class_name` from `data`);
        `store`:int: id of store timeserieses of which we want to create (value should be contained in column `store` from `data`);
        `item`:int: id of product timeserieses of which we want to create (value should be contained in column `item` from `data`);
        `data`:pd.DataFrame: cleaned dataframe;
        `target`:str: target column which contained in `data`;
        `n_prev_days`:int: value of lag;
        `features`:list[str]: which features we should return to output.
    '''
    def __init__(self, class_name:str, store:int, item:int, data:pd.DataFrame,
                 target:str = 'price', n_prev_days:int = 5,
                 features:list[str] = ['cost', 'price', 'sales_units', 'sales_values', 'wasted_units']):
        self.features = features
        self.n_prev_days = n_prev_days
        self.target = target
        
        self.data = data[(data['class_name'] == class_name) & (data['store'] == store) & (data['item'] == item)]
        self.total_count = max(0, len(self.data) - n_prev_days)
    
    def __len__(self):
        return self.total_count
    
    def __getitem__(self, idx):
        output = {'target_price':self.data.iloc[idx + self.n_prev_days][self.target]}
        output = dict(self.data[self.features].iloc[idx:idx + self.n_prev_days].to_dict('list'), **output)
        return output

In [12]:
train_ds = torch.load('../data/input/train_dataset.pt')

In [8]:
from neuralprophet import NeuralProphet

In [17]:
train_df = train_ds.datasets[0].data.rename(columns={'date': 'ds',
                                                     'sales_units': 'y'})

In [19]:
test_ds = torch.load('../data/input/test_dataset.pt')
test_df = test_ds.datasets[0].data.rename(columns={'date': 'ds',
                                                   'sales_units': 'y'})

In [20]:
test_df.index

Int64Index([1898, 5465,  457, 3078, 4003, 6052, 1768, 3597, 3375,  747,  741,
            4449,  338, 4100, 1345, 4236, 1760, 2825, 1584, 1669, 6329, 4226,
            3654, 2946, 4797, 2447],
           dtype='int64')

In [21]:
features = list(set(train_df.columns) - set(['class_name','y','ds', 'store',
                                             'price_zone_&_class_name', 'item']))
all_cols = features + ['ds', 'y']
train_df = train_df[all_cols]
train_df.set_index('ds',drop=True,inplace=True)
train_df=train_df.shift(periods=1)
train_df=train_df.iloc[1:]
train_df.reset_index(inplace=True)


test_df = test_df[all_cols]
test_df.set_index('ds',drop=True,inplace=True)
test_df=test_df.shift(periods=1)
test_df=test_df.iloc[1:]
test_df.reset_index(inplace=True)

In [22]:
all_cols,features

(['price',
  'sales_value',
  'sales_cost_x',
  'sales_cost_y',
  'cost',
  'StoreInventory',
  'ds',
  'y'],
 ['price',
  'sales_value',
  'sales_cost_x',
  'sales_cost_y',
  'cost',
  'StoreInventory'])

In [23]:
nprophet_model = NeuralProphet()
for feat in features:
    nprophet_model.add_future_regressor(feat)
metrics = nprophet_model.fit(train_df,
                             freq="D")

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 90.0% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling yearly seasonality. Run NeuralProphet with yearly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 1000


Missing logger folder: /home/otvc/ottovoncwim/Projects/ArtPrice/notebooks/lightning_logs


Finding best initial lr:   0%|          | 0/203 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

In [24]:
future_df = nprophet_model.make_future_dataframe(test_df[features+['ds','y']],
                                                 regressors_df=test_df[features],
                                                 periods = 7,
                                                 n_historic_predictions=len(test_df),)

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 96.0% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column


preds_df_2 = nprophet_model.m(train_df[['ds', 'y']])

In [25]:
preds_df_2 = nprophet_model.predict(future_df[['ds','y', 'StoreInventory', 'sales_value', 'sales_cost_y', 'sales_cost_x']])

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 96.875% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 96.875% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D


Predicting: 3it [00:00, ?it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column


In [26]:
nprophet_model.plot(preds_df_2)

FigureWidgetResampler({
    'data': [{'fill': 'none',
              'line': {'color': 'rgba(45, 146, 255, 1.0)', 'width': 2},
              'mode': 'lines',
              'name': 'yhat1',
              'type': 'scatter',
              'uid': '4e29b737-2d41-438b-bb32-0413d7928595',
              'x': array([datetime.datetime(2023, 2, 15, 0, 0),
                          datetime.datetime(2023, 2, 16, 0, 0),
                          datetime.datetime(2023, 2, 17, 0, 0),
                          datetime.datetime(2023, 2, 18, 0, 0),
                          datetime.datetime(2023, 2, 19, 0, 0),
                          datetime.datetime(2023, 2, 20, 0, 0),
                          datetime.datetime(2023, 2, 21, 0, 0),
                          datetime.datetime(2023, 2, 22, 0, 0),
                          datetime.datetime(2023, 2, 23, 0, 0),
                          datetime.datetime(2023, 2, 24, 0, 0),
                          datetime.datetime(2023, 2, 25, 0, 0),
              