In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### install & import

In [None]:
!pip3 install torch
!pip3 install torchvision
!pip3 install pytorch-lightning
!pip install pytorch_forecasting

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch-lightning
  Downloading pytorch_lightning-1.8.3.post1-py3-none-any.whl (798 kB)
[K     |████████████████████████████████| 798 kB 5.1 MB/s 
Collecting lightning-utilities==0.3.*
  Downloading lightning_utilities-0.3.0-py3-none-any.whl (15 kB)
Collecting torchmetrics>=0.7.0
  Downloading torchmetrics-0.11.0-py3-none-any.whl (512 kB)
[K     |████████████████████████████████| 512 kB 61.7 MB/s 
Collecting tensorboardX>=2.2
  Downloading tensorboardX-2.5.1-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 81.7 MB/s 
[?25hCollecting fire
  Downloading fire-0.4.0.tar.gz (87 kB)
[K     |████████████████████████████████| 87 kB 6.7 MB/s 
Building wheels

In [None]:
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch
import os

In [None]:
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss, RMSE
#from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [None]:
from pytorch_forecasting.metrics import MultiHorizonMetric
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

class TweedieLoss(MultiHorizonMetric):
    """
    Tweedie loss.

    Tweedie regression with log-link. It might be useful, e.g., for modeling total
    loss in insurance, or for any target that might be tweedie-distributed.

    The loss will take the exponential of the network output before it is returned as prediction.
    Target normalizer should therefore have no "reverse" transformation, e.g.
    for the :py:class:`~data.timeseries.TimeSeriesDataSet` initialization, one could use:

    .. code-block:: python

        from pytorch_forecasting import TimeSeriesDataSet, EncoderNormalizer

        dataset = TimeSeriesDataSet(
            target_normalizer=EncoderNormalizer(transformation=dict(forward=torch.log1p))
        )

    Note that in this example, the data is log1p-transformed before normalized but not re-transformed.
    The TweedieLoss applies this "exp"-re-transformation on the network output after it has been de-normalized.
    The result is the model prediction.
    """

    def __init__(self, reduction="mean", p: float = 1.5, **kwargs):
        """
        Args:
            p (float, optional): tweedie variance power which is greater equal
                1.0 and smaller 2.0. Close to ``2`` shifts to
                Gamma distribution and close to ``1`` shifts to Poisson distribution.
                Defaults to 1.5.
            reduction (str, optional): How to reduce the loss. Defaults to "mean".
        """
        super().__init__(reduction=reduction, **kwargs)
        assert 1 <= p < 2, "p must be in range [1, 2]"
        self.p = p

    def to_prediction(self, out: Dict[str, torch.Tensor]):
        rate = torch.exp(super().to_prediction(out))
        return rate


    def loss(self, y_pred, y_true):
        y_pred = super().to_prediction(y_pred)
        a = y_true * torch.exp(y_pred * (1 - self.p)) / (1 - self.p)
        b = torch.exp(y_pred * (2 - self.p)) / (2 - self.p)
        loss = -a + b
        return loss


### load data

In [None]:
os.chdir("/content/drive/MyDrive/소캡디/code")

##### training data (encode data) : 모델 load 용도로 사용

In [None]:
data = pd.read_pickle("./data_use/CA_1_final_grid_df.pkl")

In [None]:
data

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,sales,release,sell_price,...,rolling_mean_60,rolling_std_60,rolling_mean_180,rolling_std_180,enc_cat_id_mean,enc_cat_id_std,enc_dept_id_mean,enc_dept_id_std,enc_item_id_mean,enc_item_id_std
0,HOBBIES_1_008_CA_1_evaluation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,1,12.0,0,0.459961,...,,,,,1.003906,3.115234,1.259766,3.533203,7.285156,9.179688
1,HOBBIES_1_009_CA_1_evaluation,HOBBIES_1_009,HOBBIES_1,HOBBIES,CA_1,CA,1,2.0,0,1.559570,...,,,,,1.003906,3.115234,1.259766,3.533203,1.178711,2.013672
2,HOBBIES_1_010_CA_1_evaluation,HOBBIES_1_010,HOBBIES_1,HOBBIES,CA_1,CA,1,0.0,0,3.169922,...,,,,,1.003906,3.115234,1.259766,3.533203,0.716797,0.919434
3,HOBBIES_1_012_CA_1_evaluation,HOBBIES_1_012,HOBBIES_1,HOBBIES,CA_1,CA,1,0.0,0,5.980469,...,,,,,1.003906,3.115234,1.259766,3.533203,0.392090,0.646973
4,HOBBIES_1_015_CA_1_evaluation,HOBBIES_1_015,HOBBIES_1,HOBBIES,CA_1,CA,1,4.0,0,0.700195,...,,,,,1.003906,3.115234,1.259766,3.533203,6.015625,7.324219
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4873634,FOODS_3_823_CA_1_evaluation,FOODS_3_823,FOODS_3,FOODS,CA_1,CA,1969,,127,2.980469,...,1.083008,1.356445,1.588867,1.838867,2.419922,5.882812,3.119141,7.214844,0.822266,1.382812
4873635,FOODS_3_824_CA_1_evaluation,FOODS_3_824,FOODS_3,FOODS,CA_1,CA,1969,,0,2.480469,...,0.883301,1.462891,0.294434,0.937500,2.419922,5.882812,3.119141,7.214844,0.717773,1.205078
4873636,FOODS_3_825_CA_1_evaluation,FOODS_3_825,FOODS_3,FOODS,CA_1,CA,1969,,1,3.980469,...,1.133789,1.016602,0.950195,1.115234,2.419922,5.882812,3.119141,7.214844,0.964844,1.354492
4873637,FOODS_3_826_CA_1_evaluation,FOODS_3_826,FOODS_3,FOODS,CA_1,CA,1969,,211,1.280273,...,0.966797,1.301758,1.061523,1.415039,2.419922,5.882812,3.119141,7.214844,1.576172,1.843750


In [None]:
values = {"rolling_mean_7": 0, "rolling_mean_14": 0, "rolling_mean_30": 0, "rolling_mean_60":0, "rolling_mean_180":0,
          "rolling_std_7":0,"rolling_std_14":0, "rolling_std_30":0, "rolling_std_60":0,"rolling_std_180":0}
data.fillna(values, inplace=True)
data.drop(columns = ['id','store_id','state_id'],inplace=True)
data["log_sales"] = np.log(data.sales + 0.0001)

In [None]:
encode_data = data
encode_data = encode_data.dropna(subset=['sales']) #test data 삭제
encode_data.reset_index(drop=True)

Unnamed: 0,item_id,dept_id,cat_id,d,sales,release,sell_price,price_max,price_min,price_std,...,rolling_std_60,rolling_mean_180,rolling_std_180,enc_cat_id_mean,enc_cat_id_std,enc_dept_id_mean,enc_dept_id_std,enc_item_id_mean,enc_item_id_std,log_sales
0,HOBBIES_1_008,HOBBIES_1,HOBBIES,1,12.0,0,0.459961,0.500000,0.419922,0.019760,...,0.000000,0.000000,0.000000,1.003906,3.115234,1.259766,3.533203,7.285156,9.179688,2.484915
1,HOBBIES_1_009,HOBBIES_1,HOBBIES,1,2.0,0,1.559570,1.769531,1.559570,0.032745,...,0.000000,0.000000,0.000000,1.003906,3.115234,1.259766,3.533203,1.178711,2.013672,0.693197
2,HOBBIES_1_010,HOBBIES_1,HOBBIES,1,0.0,0,3.169922,3.169922,2.970703,0.046356,...,0.000000,0.000000,0.000000,1.003906,3.115234,1.259766,3.533203,0.716797,0.919434,-9.210340
3,HOBBIES_1_012,HOBBIES_1,HOBBIES,1,0.0,0,5.980469,6.519531,5.980469,0.115967,...,0.000000,0.000000,0.000000,1.003906,3.115234,1.259766,3.533203,0.392090,0.646973,-9.210340
4,HOBBIES_1_015,HOBBIES_1,HOBBIES,1,4.0,0,0.700195,0.720215,0.680176,0.011337,...,0.000000,0.000000,0.000000,1.003906,3.115234,1.259766,3.533203,6.015625,7.324219,1.386319
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4788262,FOODS_3_823,FOODS_3,FOODS,1941,2.0,127,2.980469,2.980469,2.480469,0.152222,...,1.242188,1.555664,1.825195,2.419922,5.882812,3.119141,7.214844,0.822266,1.382812,0.693197
4788263,FOODS_3_824,FOODS_3,FOODS,1941,0.0,0,2.480469,2.679688,2.470703,0.086365,...,1.166992,0.133301,0.696289,2.419922,5.882812,3.119141,7.214844,0.717773,1.205078,-9.210340
4788264,FOODS_3_825,FOODS_3,FOODS,1941,1.0,1,3.980469,4.378906,3.980469,0.189697,...,0.986328,0.850098,1.075195,2.419922,5.882812,3.119141,7.214844,0.964844,1.354492,0.000100
4788265,FOODS_3_826,FOODS_3,FOODS,1941,1.0,211,1.280273,1.280273,1.280273,0.000000,...,1.396484,1.016602,1.404297,2.419922,5.882812,3.119141,7.214844,1.576172,1.843750,0.000100


In [None]:
encode_data_arr = []
dept_arr = data["dept_id"].unique()

for dept in dept_arr:
  dept_data = encode_data[encode_data["dept_id"]== dept]
  dept_data = dept_data.drop(columns = ['cat_id','enc_cat_id_std','enc_cat_id_mean','dept_id','enc_dept_id_std','enc_dept_id_mean'])
  encode_data_arr.append(dept_data)

##### test data (new_prediction_data)

In [None]:
##최근 90일 + 28일
new_prediction_data = data[lambda x: x.d > x.d.max() - 90 - 28]
new_prediction_data.reset_index(drop=True)
new_prediction_data.fillna({'sales' : 0},inplace=True)
new_prediction_data.fillna({'log_sales' : 0},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


In [None]:
predict_data_arr = []

for dept in dept_arr:
  predict_data = new_prediction_data[new_prediction_data["dept_id"]== dept]
  predict_data = predict_data.drop(columns = ['cat_id','enc_cat_id_std','enc_cat_id_mean','dept_id','enc_dept_id_std','enc_dept_id_mean'])
  predict_data_arr.append(predict_data)

##### evaluation_data & weight

In [None]:
eval_data = pd.read_pickle("./data_use/test_CA_1.pkl")

In [None]:
eval_data

Unnamed: 0,item_id,dept_id,cat_id,d_1942,d_1943,d_1944,d_1945,d_1946,d_1947,d_1948,...,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969
0,HOBBIES_1_001,HOBBIES_1,HOBBIES,2,0,1,0,0,1,4,...,2,1,2,0,0,1,0,1,3,1
1,HOBBIES_1_002,HOBBIES_1,HOBBIES,0,2,0,1,0,1,0,...,1,0,0,1,0,0,2,1,1,0
2,HOBBIES_1_003,HOBBIES_1,HOBBIES,0,0,0,0,0,1,0,...,1,3,2,1,0,2,1,0,1,1
3,HOBBIES_1_004,HOBBIES_1,HOBBIES,0,0,1,0,6,3,3,...,3,3,4,2,1,6,3,1,4,3
4,HOBBIES_1_005,HOBBIES_1,HOBBIES,2,0,1,1,2,4,0,...,0,1,2,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3044,FOODS_3_823,FOODS_3,FOODS,0,5,1,0,0,4,0,...,2,3,5,1,2,1,5,1,2,2
3045,FOODS_3_824,FOODS_3,FOODS,1,1,0,1,3,1,0,...,0,3,0,3,1,0,6,0,0,1
3046,FOODS_3_825,FOODS_3,FOODS,1,0,2,0,1,1,2,...,3,1,1,1,2,2,0,0,0,0
3047,FOODS_3_826,FOODS_3,FOODS,5,0,3,0,5,0,0,...,1,2,1,3,6,3,2,1,4,3


### load best model

In [None]:
loaded_model_arr = []

In [None]:
max_prediction_length = 28  # maximum prediction/decoder length (choose this not too short as it can help convergence) 
max_encoder_length = 90 # maximum length to encode. This is the maximum history length used by the time series dataset. #1년은 ram용량 없어짐 -> 로컬환경 구축
training_cutoff = data["d"].max() - max_prediction_length #training set에 사용할 time만큼

In [None]:
for i in range(7):
  training = TimeSeriesDataSet(
      encode_data_arr[i][lambda x: x["d"] <= training_cutoff],
      time_idx="d",
      target="sales",
      group_ids=["item_id"],
      min_encoder_length= 1,
      max_encoder_length= max_encoder_length,
      min_prediction_length= 1,
      max_prediction_length=max_prediction_length,
      static_categoricals=["item_id"], 
      static_reals=["release","price_max","price_min","price_std","price_mean","price_nunique","enc_item_id_mean", "enc_item_id_std"],
      time_varying_known_categoricals=["snap_CA", "snap_TX","snap_WI","tm_w_end","event_name_1","event_name_2","event_type_1","event_type_2"], 
      time_varying_known_reals=["d","tm_d","tm_w",'tm_m',"tm_y","tm_wm","tm_dw","sell_price","price_norm","price_momentum","price_momentum_m","price_momentum_y",
                              "rolling_mean_7", "rolling_mean_14", "rolling_mean_30", "rolling_mean_60", "rolling_mean_180",
                              "rolling_std_7","rolling_std_14", "rolling_std_30", "rolling_std_60","rolling_std_180"],
      time_varying_unknown_categoricals=[],
      time_varying_unknown_reals=["sales","log_sales"],
      add_relative_time_idx=True,
      add_target_scales=True,
      add_encoder_length=True,
  )

  loaded_model = tft = TemporalFusionTransformer.from_dataset(
      training,
      learning_rate=0.001,
      hidden_size=80, #state size가 이거일까...?
      attention_head_size=4,
      dropout=0.1,
      hidden_continuous_size=16,
      output_size=1,  # point로 바꿔서 1로 변경
      loss= TweedieLoss(),
      log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
      reduce_on_plateau_patience=4,
  )

  loaded_model.load_state_dict(torch.load('./model_build/tft_best_model_1203_dept'+str(i)+'.pt'))
  loaded_model.eval()

  loaded_model_arr.append(loaded_model)


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


In [None]:
del encode_data

### test data 예측

In [None]:
pred_data = eval_data.copy()

In [None]:
for i in range(7):
    item_id_li = predict_data_arr[i]['item_id'].unique()
    for item in item_id_li:
      pred_item = predict_data_arr[i].loc[predict_data_arr[i]["item_id"]==item]
      pred_sales = loaded_model_arr[i].predict(pred_item)
      for idx in range(28):
        pred_data.loc[(pred_data['item_id'] == item) ,'d_'+ str(1942+idx)] = float(pred_sales[0][idx])

In [None]:
"""
for item in item_id_li:
  pred_item = new_prediction_data.loc[new_prediction_data["item_id"]==item]
  pred_sales = loaded_model.predict(pred_item)
  for idx in range(28):
    new_prediction_data.loc[(new_prediction_data['item_id'] == item) & (new_prediction_data['d'] == 1942+idx), 'sales'] = np.exp(float(pred_sales[0][idx]))
"""

'\nfor item in item_id_li:\n  pred_item = new_prediction_data.loc[new_prediction_data["item_id"]==item]\n  pred_sales = loaded_model.predict(pred_item)\n  for idx in range(28):\n    new_prediction_data.loc[(new_prediction_data[\'item_id\'] == item) & (new_prediction_data[\'d\'] == 1942+idx), \'sales\'] = np.exp(float(pred_sales[0][idx]))\n'

In [None]:
pred_data

Unnamed: 0,item_id,dept_id,cat_id,d_1942,d_1943,d_1944,d_1945,d_1946,d_1947,d_1948,...,d_1960,d_1961,d_1962,d_1963,d_1964,d_1965,d_1966,d_1967,d_1968,d_1969
0,HOBBIES_1_001,HOBBIES_1,HOBBIES,0.938892,0.945946,0.950324,0.952941,0.980571,1.209886,1.378958,...,1.198216,1.439456,1.357712,1.054245,1.028094,1.041064,1.120881,1.171587,1.355676,1.531903
1,HOBBIES_1_002,HOBBIES_1,HOBBIES,0.204655,0.210812,0.211025,0.219271,0.228486,0.290950,0.304810,...,0.235992,0.317538,0.359703,0.258285,0.267128,0.276135,0.274183,0.282407,0.352704,0.342045
2,HOBBIES_1_003,HOBBIES_1,HOBBIES,0.508579,0.491998,0.489199,0.485918,0.492030,0.650611,0.659826,...,0.659642,0.743484,0.740506,0.546349,0.537359,0.547272,0.579385,0.645642,0.799678,0.789807
3,HOBBIES_1_004,HOBBIES_1,HOBBIES,1.834177,1.775825,1.759428,1.759523,1.818032,1.960325,2.065124,...,1.749107,1.869299,1.661570,1.443096,1.381193,1.349048,1.432248,1.495522,1.728478,2.250199
4,HOBBIES_1_005,HOBBIES_1,HOBBIES,1.028361,1.073501,1.129903,1.219895,1.263530,1.455928,1.485640,...,1.386101,1.510013,1.499971,1.187150,1.145601,1.158088,1.204673,1.255398,1.440577,1.709105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3044,FOODS_3_823,FOODS_3,FOODS,0.756756,0.716694,0.658489,0.639162,0.623507,0.866423,0.872681,...,0.891483,0.953296,1.077440,0.704624,0.689948,0.708497,0.792951,0.865471,1.166057,1.209290
3045,FOODS_3_824,FOODS_3,FOODS,0.487985,0.523533,0.592063,0.615456,0.645580,0.853166,0.891909,...,0.954306,1.064646,1.143783,0.751859,0.746304,0.725804,0.775498,0.823590,0.991498,0.980075
3046,FOODS_3_825,FOODS_3,FOODS,0.821108,0.778104,0.765488,0.772853,0.833689,1.007092,1.122881,...,1.041499,1.215483,1.249034,0.859414,0.809141,0.830558,0.838362,0.935843,1.159525,1.108598
3047,FOODS_3_826,FOODS_3,FOODS,1.105463,1.101823,1.097376,1.106840,1.137650,1.369412,1.341064,...,1.258260,1.425616,1.540167,1.109517,1.075207,1.132282,1.136356,1.190555,1.480545,1.425075


In [None]:
pred_data.to_pickle('./data_use/pred_data_1203_dept.pkl')

----------------------------------------------------------------------------------------------------------------------------------------------------------------