In [45]:
import os
import time
import datetime
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
import argparse

In [42]:
def pre_process(df):
    df = df.rename(columns={
        '年月': 'year_month',
        '销售凭证日期': 'datetime',
        '物料编码': 'unique_id',
        '进单数量': 'y',
        '折前含税金额':'mount'
    })
    df = df.groupby(['year_month','unique_id'], as_index=False).agg({'y':'sum', 'mount':'sum'}) 
    df['price'] = df['mount'] / df['y']
    # df = df[['datetime', 'unique_id', 'price', 'y']]
    df['year_month'] = pd.to_datetime(df['year_month'], format='%Y%m')
    df['year'] = df['year_month'].dt.year
    df['month'] = df['year_month'].dt.month
    # print(df.head())
    df_year = pd.DataFrame({'year':df['year'].unique(),'j':-1})
    df_month =  pd.DataFrame({'month':range(1,13),'j':-1})
    df_id = pd.DataFrame({'unique_id':df['unique_id'].unique(),'j':-1})
    df_year_month = pd.merge(df_year,df_month)
    df_year_month = df_year_month[~((df_year_month['year']==df['year'].max())&(df_year_month['month']>df['year_month'].max().month))]
    df_year_month_id = pd.merge(df_year_month,df_id)[['year','month','unique_id']]
    df = pd.merge(df_year_month_id,df,how='left')
    df['year_month'] = df.apply(lambda x:datetime.date(x.year,x.month,1))
    print(df.sample())
    return 
    df = pd.merge(df3,df,how='left')
    df['y'] =df['y'].fillna(0)
    df = df.sort_values(by=['unique_id','datetime'])
    df = df.groupby(['unique_id'], as_index=False).apply(lambda group: group.ffill())
    df = df.groupby(['unique_id'], as_index=False).apply(lambda group: group.bfill())
    df['time_idx'] = (df['datetime'] -
                      df['datetime'].min()).apply(lambda x: x.days)
    df["month"] = df.datetime.dt.month.astype(str).astype("category")
    df['unique_id'] = df['unique_id'].astype(str).astype("category")
    df['y'] = df['y'].astype('float')
    df["log_y"] = np.log(df.y + 1e-8)
    df["avg_y_by_id"] = df.groupby(["unique_id"],
                                   observed=True).y.transform("mean")
    return df

In [12]:
def bulid_data_loader(data, forecast):
    max_prediction_length = forecast
    max_encoder_length = forecast * 4
    training_cutoff = data["time_idx"].max() - max_prediction_length
    filter_date = training_cutoff-max_encoder_length//2
    id_min_idx = data[data['y']>0].groupby(['unique_id']).time_idx.min()
    # print(id_min_idx[id_min_idx>filter_date].index)
    print(data.shape)
    data = data[~data['unique_id'].isin(id_min_idx[id_min_idx>filter_date].index)]
    print(data.shape)
    training = TimeSeriesDataSet(
        data[lambda x: x.time_idx <= training_cutoff],
        time_idx="time_idx",
        target="y",
        group_ids=["unique_id"],
        min_encoder_length=max_encoder_length //
        2,  # keep encoder length long (as it is in the validation set)
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=max_prediction_length,
        static_categoricals=["unique_id"],
        time_varying_known_categoricals=["month"],
        time_varying_known_reals=["time_idx", "price"],
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=[
            "y",
            "log_y",
            "avg_y_by_id",
        ],
        target_normalizer=GroupNormalizer(
            groups=["unique_id"],
            transformation="softplus"),  # use softplus and normalize by group
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        # allow_missing_timesteps=True
    )
    validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)

    # create dataloaders for model
    batch_size = 64 # set this between 32 to 128
    train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
    val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
    return training,train_dataloader,val_dataloader

In [43]:
df_full = pd.read_csv('./data/20230316.csv')

In [46]:
df_full = pre_process(df_full)

KeyError: 'year'

In [11]:
df_full[(df_full['unique_id']=='000E155E93AC7F454684F7B48B59C169')&(df_full['y']>0)]

Unnamed: 0,datetime,j,unique_id,price,y,time_idx,month,log_y,avg_y_by_id
2486139,2021-07-02,-1,000E155E93AC7F454684F7B48B59C169,242.805,2.0,182,7,0.6931472,0.067174
2745584,2021-07-21,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,201,7,1e-08,0.067174
4343219,2021-11-15,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,318,11,1e-08,0.067174
5968164,2022-03-14,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,437,3,1e-08,0.067174
6159334,2022-03-28,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,451,3,1e-08,0.067174
6377814,2022-04-13,-1,000E155E93AC7F454684F7B48B59C169,242.803333,3.0,467,4,1.098612,0.067174
6459744,2022-04-19,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,473,4,1e-08,0.067174
6487054,2022-04-21,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,475,4,1e-08,0.067174
6801119,2022-05-14,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,498,5,1e-08,0.067174
6814774,2022-05-15,-1,000E155E93AC7F454684F7B48B59C169,242.8,1.0,499,5,1e-08,0.067174


In [13]:
training,train_dataloader,val_dataloader =  bulid_data_loader(df_full, 120)

(10773795, 9)
(8187453, 9)


In [14]:
def train_step_1(training,train_dataloader,val_dataloader):
    # configure network and trainer
    pl.seed_everything(42)
    trainer = pl.Trainer(
        gpus=0,
        # clipping gradients is a hyperparameter and important to prevent divergance
        # of the gradient for recurrent neural networks
        gradient_clip_val=0.1,
    )


    tft = TemporalFusionTransformer.from_dataset(
        training,
        # not meaningful for finding the learning rate but otherwise very important
        learning_rate=0.03,
        hidden_size=16,  # most important hyperparameter apart from learning rate
        # number of attention heads. Set to up to 4 for large datasets
        attention_head_size=4,
        dropout=0.1,  # between 0.1 and 0.3 are good values
        hidden_continuous_size=8,  # set to <= hidden_size
        output_size=7,  # 7 quantiles by default
        loss=QuantileLoss(),
        # reduce learning rate if no improvement in validation loss after x epochs
        reduce_on_plateau_patience=4,
        optimizer='adam'
    )
    print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
    # find optimal learning rate
    res = trainer.tuner.lr_find(
        tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
        max_lr=10.0,
        min_lr=1e-6,
    )

    print(f"suggested learning rate: {res.suggestion()}")
    fig = res.plot(show=True, suggest=True)
    fig.show()