## Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/LG_AIMERS

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1bo8OXmxCtxx_kqaMVbLxgeD7-6fO--3a/LG_AIMERS


In [None]:
import random
import os
# os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from Informer2020.utils.timefeatures import time_features
import warnings
warnings.filterwarnings('ignore')

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

## Hyperparameter Setting

In [None]:
CFG = {
    'seq_len':90, # 90일치로 학습
    'label_len':30, # 21일치 예측
    'pred_len':21,
    'EPOCHS':20,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':2048,
    'SEED':41,
    'STRIDE':3,
    'checkpoint':'ckpt_sewoong_4',
    'timeenc':1
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## 데이터 불러오기

In [None]:
train_data = pd.read_csv('./Part2/train.csv').drop(columns=['ID', '제품'])

In [None]:
brand = pd.read_csv('./Part2/brand_keyword_cnt.csv')

## 데이터 전처리

In [None]:
## '2023-02-23':'2023-03-28' 이상치 대체

ids = train_data.index[train_data.iloc[:,422:456].sum(axis=1)==0]
around = list(range(422-7,422))+list(range(457,463))
mean = train_data.iloc[ids,around].mean(axis=1)
mean = mean.values.reshape(-1,1)

replace_data = np.column_stack([mean for i in range(34)])
train_data.iloc[ids,422:456] = replace_data

In [None]:
# upper값으로 이상치 대치

m = (train_data.iloc[:,4:][train_data.iloc[:,4:]!=0]).mean(axis=1)
s = (train_data.iloc[:,4:][train_data.iloc[:,4:]!=0]).std(axis=1)

upper = m+3*s

temp = train_data.iloc[:,4:].T
def replace(column,upper):
    column_index = temp.columns.get_loc(column.name)
    u = upper[column_index]
    ret = np.where(column>u,u,column)
    return ret

temp = temp.apply(lambda x:replace(x,upper))
train_data.iloc[:,4:] = temp.T

In [None]:
#brand keyword 언급량 NaN값 전체 평균으로 대체
empty_brand = brand.set_index('브랜드').T.isnull().sum().sort_values(ascending=False)[:35].index
col = brand.columns[1:]
mean = brand.iloc[:,1:].mean()
for b in empty_brand:
    brand.loc[brand['브랜드']==b,col] = mean.values

In [None]:
total_scale = True
# 숫자형 변수들의 min-max scaling을 수행하는 코드입니다.
numeric_cols = train_data.columns[4:]
# 칵 column의 min 및 max 계산

if total_scale:
  min_values = train_data[numeric_cols].min(axis=1)
  max_values = train_data[numeric_cols].max(axis=1)
else:
  min_values = train_data[numeric_cols[border1s[0]:border2s[0]]].min(axis=1)
  max_values = train_data[numeric_cols[border1s[0]:border2s[0]]].max(axis=1)
# 각 행의 범위(max-min)를 계산하고, 범위가 0인 경우 1로 대체
ranges = max_values - min_values
ranges[ranges == 0] = 1  ##기존 코드에서는 0으로 처리함
# min-max scaling 수행
train_data[numeric_cols] = (train_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)
# max와 min 값을 dictionary 형태로 저장
scale_min_dict = min_values.to_dict()
scale_max_dict = max_values.to_dict()

In [None]:
## diff_data : 전날과의 판매량 차이 (차분)
train_diff = (train_data.iloc[:,4:].diff(axis=1)).fillna(0)

In [None]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류']

for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])

label_encoder.fit(train_data['브랜드'])
train_data['브랜드'] = label_encoder.transform(train_data['브랜드'])
brand['브랜드'] = label_encoder.transform(brand['브랜드'])

In [None]:
num_train = int(len(train_data.columns[4:])*0.8)
num_vali = len(train_data.columns[4:]) - num_train
border1s = [0, num_train-CFG['seq_len']]
border2s = [num_train, num_train+num_vali]
set_type = {'train':0,'valid':1}

In [None]:
val_data = train_data.iloc[:,4 + border1s[1]:]
val_data = pd.concat([train_data.iloc[:,:4],val_data],axis=1)
train_data = train_data.iloc[:, :4 + border2s[0]]

## make dataset

In [None]:
def make_data(data, seq_len=CFG['seq_len'], pred_len=CFG['pred_len'], label_len=CFG['label_len'], stride=CFG['STRIDE']):

    date = pd.DataFrame({'date':pd.to_datetime(data.columns[4:])})
    data_stamp = time_features(date, timeenc=CFG['timeenc'], freq='d') ## time_enc=0 : month,day,weekday / time_enc=1 : month,dasy,weekday를 encoding
    num_rows = len(data) #train의 행 => 제품개수
    window_size = seq_len + pred_len

    enc_input_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), seq_len, 3))
    dec_input_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), label_len + pred_len, 3))

    enc_mark_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), seq_len, data_stamp.shape[1]+4))
    dec_mark_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), label_len + pred_len, data_stamp.shape[1]+4))

    for i in tqdm(range(num_rows)): #각 ID 마다
        encode_info = np.array(data.iloc[i, :4]) #분류 정보
        sales_data = np.array(data.iloc[i, 4:]) #판매 정보
        temp_brand = brand.loc[brand['브랜드']==encode_info[-1],data.columns[4:]].values[0]
        t_diff = np.array(train_diff.iloc[i,4:])

        assert len(temp_brand)==len(sales_data)

        for idx,j in enumerate(range(0,len(sales_data) - window_size + 1, stride)):

            s_begin = j
            s_end = s_begin + seq_len
            r_begin = s_end - label_len
            r_end = r_begin + label_len + pred_len

            enc_mark = data_stamp[s_begin:s_end]
            dec_mark = data_stamp[r_begin:r_end]

            enc_info = np.stack([encode_info for _ in range(s_end-s_begin)])
            dec_info = np.stack([encode_info for _ in range(r_end-r_begin)])

            enc_mark = np.concatenate([enc_mark,enc_info],axis=1)
            dec_mark = np.concatenate([dec_mark,dec_info],axis=1)

            brand_window = temp_brand[j : j + window_size]
            sales_window = sales_data[j : j + window_size]
            t_diff_window = t_diff[j : j + window_size]

            enc_temp_data = np.column_stack((brand_window[:seq_len],
                                             t_diff_window[:seq_len],
                                             sales_window[:seq_len]))

            dec_temp_data = np.column_stack((brand_window[-(label_len+pred_len):],
                                             t_diff_window[-(label_len+pred_len):],
                                             sales_window[-(label_len+pred_len):]))

            enc_input_data[i * len(range(0,len(data.columns) - 4 - window_size + 1,stride)) + idx] = enc_temp_data
            dec_input_data[i * len(range(0,len(data.columns) - 4 - window_size + 1,stride)) + idx] = dec_temp_data

            enc_mark_data[i] = enc_mark
            dec_mark_data[i] = dec_mark

    return enc_input_data, dec_input_data, enc_mark_data, dec_mark_data

In [None]:
def make_predict_data(data, seq_len=CFG['seq_len'], pred_len=CFG['pred_len'], label_len=CFG['label_len']):

    date = pd.DataFrame({'date':pd.to_datetime(data.columns[-seq_len:])})
    pred_dates = pd.date_range(date.date.values[-1], periods=pred_len+1, freq='d')
    df_stamp = pd.DataFrame(columns = ['date'])
    df_stamp.date = list(date.date.values) + list(pred_dates[1:])
    data_stamp = time_features(df_stamp, timeenc=CFG['timeenc'], freq='d')

    num_rows = len(data)
    window_size = seq_len + pred_len

    enc_input_data = np.empty((num_rows, seq_len, 3))
    dec_input_data = np.empty((num_rows, label_len, 3))

    enc_mark_data = np.empty((num_rows, seq_len, data_stamp.shape[1] + 4))
    dec_mark_data = np.empty((num_rows, label_len + pred_len, data_stamp.shape[1] + 4))

    enc_mark = data_stamp[:seq_len]
    dec_mark = data_stamp[seq_len-label_len:]

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -seq_len:])
        temp_brand = brand[brand['브랜드']==encode_info[-1]].values[0][-seq_len:]
        temp_t_diff = np.array(train_diff.iloc[i,-seq_len:])

        enc_info = np.stack([encode_info for _ in range(seq_len)])
        dec_info = np.stack([encode_info for _ in range(label_len+pred_len)])

        enc_mark_temp = np.concatenate([enc_mark,enc_info],axis=1)
        dec_mark_temp = np.concatenate([dec_mark,dec_info],axis=1)

        sales_window = sales_data[-seq_len : ]
        brand_window = temp_brand[-seq_len : ]
        t_diff_window = temp_t_diff[-seq_len : ]

        enc_temp_data = np.column_stack((brand_window[:seq_len],
                                         t_diff_window[:seq_len],
                                         sales_window[:seq_len]))

        dec_temp_data = np.column_stack((brand_window[-label_len:],
                                         t_diff_window[-label_len:],
                                         sales_window[-label_len:]))

        enc_input_data[i] = enc_temp_data
        dec_input_data[i] = dec_temp_data

        enc_mark_data[i] = enc_mark_temp
        dec_mark_data[i] = dec_mark_temp

    return enc_input_data, dec_input_data, enc_mark_data, dec_mark_data

In [None]:
def make_psfa_data(data, seq_len=CFG['seq_len'], pred_len=CFG['pred_len'], label_len=CFG['label_len']):

    num_rows = len(data) #train의 행 => 제품개수
    window_size = seq_len + pred_len

    date = pd.DataFrame({'date':pd.to_datetime(data.columns[4:])})
    data_stamp = time_features(date, timeenc=CFG['timeenc'], freq='d') ## time_enc=0 : month,day,weekday / time_enc=1 : month,dasy,weekday를 encoding
    data_stamp = data_stamp[-window_size:,:]

    enc_input_data = np.empty((num_rows, seq_len, 3))
    dec_input_data = np.empty((num_rows, label_len + pred_len, 3))

    enc_mark_data = np.empty((num_rows, seq_len, data_stamp.shape[1] + 4))
    dec_mark_data = np.empty((num_rows, label_len + pred_len, data_stamp.shape[1] + 4))

    enc_mark = data_stamp[:seq_len]
    dec_mark = data_stamp[seq_len-label_len:]

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -window_size:])
        temp_brand = brand[brand['브랜드']==encode_info[-1]].values[0][-window_size:]
        temp_t_diff = np.array(train_diff.iloc[i, -window_size:])
        # changes = np.array(change_cnts.iloc[i,:])

        sales_window = sales_data[-window_size : ]
        brand_window = temp_brand[-window_size : ]
        t_diff_window = temp_t_diff[-window_size : ]

        enc_info = np.stack([encode_info for _ in range(seq_len)])
        dec_info = np.stack([encode_info for _ in range(label_len+pred_len)])

        enc_mark_temp = np.concatenate([enc_mark,enc_info],axis=1)
        dec_mark_temp = np.concatenate([dec_mark,dec_info],axis=1)


        enc_temp_data = np.column_stack((brand_window[:seq_len],
                                         t_diff_window[:seq_len],
                                         sales_window[:seq_len]))

        dec_temp_data = np.column_stack((brand_window[-(label_len+pred_len):],
                                         t_diff_window[-(label_len+pred_len):],
                                         sales_window[-(label_len+pred_len):]))

        enc_input_data[i] = enc_temp_data
        dec_input_data[i] = dec_temp_data

        enc_mark_data[i] = enc_mark_temp
        dec_mark_data[i] = dec_mark_temp

    return enc_input_data, dec_input_data, enc_mark_data, dec_mark_data

In [None]:
enc_train_data, dec_train_data, enc_mark_train_data, dec_mark_train_data = make_data(train_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [None]:
enc_train_data.shape, dec_train_data.shape, enc_mark_train_data.shape, dec_mark_train_data.shape

((1366540, 90, 3), (1366540, 51, 3), (1366540, 90, 8), (1366540, 51, 8))

In [None]:
enc_val_data, dec_val_data, enc_mark_val_data, dec_mark_val_data = make_data(val_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [None]:
enc_val_data.shape, dec_val_data.shape, enc_mark_val_data.shape, dec_mark_val_data.shape

((381360, 90, 3), (381360, 51, 3), (381360, 90, 8), (381360, 51, 8))

In [None]:
enc_test_data, dec_test_data, enc_mark_test_data, dec_mark_test_data = make_predict_data(val_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [None]:
enc_test_data.shape, dec_test_data.shape, enc_mark_test_data.shape, dec_mark_test_data.shape

((15890, 90, 3), (15890, 30, 3), (15890, 90, 8), (15890, 51, 8))

In [None]:
enc_psfa_data, dec_psfa_data, enc_mark_psfa_data, dec_mark_psfa_data = make_psfa_data(val_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [None]:
enc_psfa_data.shape, dec_psfa_data.shape, enc_mark_psfa_data.shape, dec_mark_psfa_data.shape

((15890, 90, 3), (15890, 51, 3), (15890, 90, 8), (15890, 51, 8))

## PSFA

In [None]:
indexs_bigcat={}
for bigcat in train_data['대분류'].unique():
    indexs_bigcat[bigcat] = list(train_data.loc[train_data['대분류']==bigcat].index)

indexs_bigcat.keys()

## 대분류별로 15890개 item의 index

dict_keys([1, 2, 0, 4, 3])

In [None]:
##item별로 딱 한번만 예측해서 15890개만 존재할때 사용

def PSFA(pred, target):
    PSFA = 1
    for cat in range(5):
        ids = indexs_bigcat[cat]
        for day in range(CFG['pred_len']):
            total_sell = np.sum(target[ids, day]) # day별 총 판매량
            pred_values = pred[ids, day] # day별 예측 판매량
            target_values = target[ids, day] # day별 실제 판매량

            # 실제 판매와 예측 판매가 같은 경우 오차가 없는 것으로 간주
            denominator = np.maximum(target_values, pred_values)
            diffs = np.where(denominator!=0, np.abs(target_values - pred_values) / denominator, 0)

            if total_sell != 0:
                sell_weights = target_values / total_sell  # Item별 day 총 판매량 내 비중
            else:
                sell_weights = np.ones_like(target_values) / len(ids)  # 1 / len(ids)로 대체

            if not np.isnan(diffs).any():  # diffs에 NaN이 없는 경우에만 PSFA 값 업데이트
                PSFA -= np.sum(diffs * sell_weights) / (CFG['pred_len'] * 5)


    return PSFA

## Custom Dataset

In [None]:
%cd Informer2020

/content/drive/.shortcut-targets-by-id/1bo8OXmxCtxx_kqaMVbLxgeD7-6fO--3a/LG_AIMERS/Informer2020


In [None]:
from data.data_loader import Dataset_Custom

In [None]:
train_dataset = Dataset_Custom(enc_train_data, dec_train_data, enc_mark_train_data, dec_mark_train_data)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = Dataset_Custom(enc_val_data, dec_val_data, enc_mark_val_data, dec_mark_val_data)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

psfa_dataset = Dataset_Custom(enc_psfa_data, dec_psfa_data, enc_mark_psfa_data, dec_mark_psfa_data)
psfa_loader = DataLoader(psfa_dataset, batch_size = 512, shuffle=False, num_workers=0)

test_dataset = Dataset_Custom(enc_test_data, dec_test_data, enc_mark_test_data, dec_mark_test_data)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## 모델 선언

#### parameter

In [None]:
input_size = enc_test_data.shape[-1]
input_size

3

In [None]:
enc_in = input_size
dec_in = input_size
c_out = 1
seq_len = CFG['seq_len']
label_len = CFG['label_len']
out_len = CFG['pred_len']
output_attention = False
freq = 'd'
embed = 'timeF'
dropout = 0.1
d_model=512
d_ff = 1024
attn = 'prob' #full
device = device
sep_cat = True

learning_rate = CFG['LEARNING_RATE']

In [None]:
from models.model import Informer

model = Informer(enc_in=enc_in, dec_in=dec_in, c_out=c_out, seq_len=seq_len, label_len=label_len, out_len=out_len, d_model=d_model, d_ff=d_ff, attn=attn, output_attention=output_attention, freq=freq, embed=embed, dropout=dropout, sep_cat=sep_cat, device=device)

## 모델 학습

In [None]:
def process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device):
  batch_x = batch_x.float().to(device)
  batch_y = batch_y.float()

  batch_x_mark = batch_x_mark.float().to(device)
  batch_y_mark = batch_y_mark.float().to(device)

  # decoder input
  dec_inp = torch.zeros([batch_y.shape[0], CFG['pred_len'], batch_y.shape[-1]]).float()
  dec_inp = torch.cat([batch_y[:,:CFG['label_len'],:], dec_inp], dim=1).float().to(device)
  # encoder - decoder
  outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

  f_dim = -1 #feature = 'MS'
  batch_y = batch_y[:,-CFG['pred_len']:,f_dim:].to(device)

  return outputs, batch_y

def adjust_learning_rate(optimizer, epoch):
  # lr = args.learning_rate * (0.2 ** (epoch // 2))
  lr_adjust = {epoch: CFG['LEARNING_RATE'] * (0.5 ** ((epoch-1) // 1))}

  if epoch in lr_adjust.keys():
      lr = lr_adjust[epoch]
      for param_group in optimizer.param_groups:
          param_group['lr'] = lr
      print('Updating learning rate to {}'.format(lr))

In [None]:
location = CFG['checkpoint']

In [None]:
import time
from tqdm import tqdm

def get_psfa(model, psfa_loader, device):
  psfa_pred_list = []
  psfa_target_list = []

  with torch.no_grad():
    for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(psfa_loader)):
      pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
      pred = pred.cpu().numpy()
      true = true.cpu().numpy()

      psfa_pred_list.extend(pred)
      psfa_target_list.extend(true)

  psfa_pred_list = np.array(psfa_pred_list)
  psfa_target_list = np.array(psfa_target_list)

  for idx in range(len(psfa_pred_list)): #15889
    psfa_pred_list[idx, :] = psfa_pred_list[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]
    psfa_target_list[idx, :] = psfa_target_list[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

  psfa_pred_list = np.round(psfa_pred_list, 0).astype(int)
  psfa_target_list = np.round(psfa_target_list, 0).astype(int)

  psfa_pred_list = psfa_pred_list.squeeze(-1)
  psfa_target_list = psfa_target_list.squeeze(-1)

  return PSFA(psfa_pred_list,psfa_target_list)


def vali(model, val_loader, psfa_loader, criterion, optimizer, device):
  model.eval()
  total_loss = []
  with torch.no_grad():
    for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(val_loader)):
        pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
        loss = criterion(pred.detach().cpu(), true.detach().cpu())
        total_loss.append(loss)

  total_loss = np.average(total_loss)
  psfa_score = get_psfa(model, psfa_loader, device)
  return total_loss, psfa_score

def train(model, train_loader, val_loader, psfa_loader, optimizer, criterion, device):
  best_loss = 9999999
  best_psfa = 0
  best_psfa_model = None
  best_loss_model = None
  patient = 0
  train_loss_list = []
  val_loss_list = []
  val_psfa_list = []
  model = model.to(device)

  path = os.path.join('./checkpoint', location)
  if not os.path.exists(path):
      os.makedirs(path)

  train_steps = len(train_loader)

  for epoch in range(CFG['EPOCHS']):
      iter_count = 0
      train_loss = []
      model.train()

      for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(train_loader)):

          optimizer.zero_grad()
          pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
          loss = criterion(pred, true)
          train_loss.append(loss.item())

          loss.backward()
          optimizer.step()

      train_loss = np.average(train_loss)
      val_loss, val_psfa = vali(model, val_loader, psfa_loader, criterion, optimizer, device)

      train_loss_list.append(train_loss)
      val_loss_list.append(val_loss)
      val_psfa_list.append(val_psfa)

      print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Val Loss: {3:.7f} Val PSFA: {4:.7f}".format(epoch + 1, train_steps, train_loss, val_loss, val_psfa))

      if best_loss > val_loss:
          best_loss = val_loss
          best_loss_model = model
          torch.save(model.state_dict(), path+'/'+'ckpt_loss.pth')
          patient = 0
          print('Loss Model Saved')

      if best_psfa < val_psfa:
          best_psfa = val_psfa
          best_psfa_model = model
          torch.save(model.state_dict(), path+'/'+'ckpt_psfa.pth')
          patient = 0
          print('PSFA Model Saved')

      #adjust_learning_rate(optimizer, epoch+1)

  best_model_path = path+'/'+'ckpt_psfa.pth'
  #model.load_state_dict(torch.load(best_model_path))

  return best_psfa_model, train_loss_list, val_loss_list, val_psfa_list

## Run !!

In [None]:
#0828
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
criterion = nn.MSELoss()
infer_model, train_loss_list, val_loss_list, val_psfa_list = train(model, train_loader, val_loader, psfa_loader, optimizer, criterion, device)

100%|██████████| 668/668 [09:22<00:00,  1.19it/s]
100%|██████████| 187/187 [01:10<00:00,  2.63it/s]
100%|██████████| 32/32 [00:06<00:00,  4.72it/s]


Epoch: 1, Steps: 668 | Train Loss: 0.0298921 Val Loss: 0.0233423 Val PSFA: 0.6703260
Loss Model Saved
PSFA Model Saved


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.74it/s]
100%|██████████| 32/32 [00:02<00:00, 10.68it/s]


Epoch: 2, Steps: 668 | Train Loss: 0.0222509 Val Loss: 0.0252829 Val PSFA: 0.4872968


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:07<00:00,  2.75it/s]
100%|██████████| 32/32 [00:02<00:00, 10.71it/s]


Epoch: 3, Steps: 668 | Train Loss: 0.0212022 Val Loss: 0.0272048 Val PSFA: 0.5312960


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.75it/s]
100%|██████████| 32/32 [00:02<00:00, 10.67it/s]


Epoch: 4, Steps: 668 | Train Loss: 0.0205665 Val Loss: 0.0253300 Val PSFA: 0.5535748


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:07<00:00,  2.75it/s]
100%|██████████| 32/32 [00:02<00:00, 10.68it/s]


Epoch: 5, Steps: 668 | Train Loss: 0.0202127 Val Loss: 0.0270391 Val PSFA: 0.7140272
PSFA Model Saved


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.75it/s]
100%|██████████| 32/32 [00:02<00:00, 10.67it/s]


Epoch: 6, Steps: 668 | Train Loss: 0.0199687 Val Loss: 0.0282801 Val PSFA: 0.7019429


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.74it/s]
100%|██████████| 32/32 [00:02<00:00, 10.67it/s]


Epoch: 7, Steps: 668 | Train Loss: 0.0198216 Val Loss: 0.0260369 Val PSFA: 0.6607804


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.75it/s]
100%|██████████| 32/32 [00:02<00:00, 10.69it/s]


Epoch: 8, Steps: 668 | Train Loss: 0.0196696 Val Loss: 0.0253075 Val PSFA: 0.6476471


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.75it/s]
100%|██████████| 32/32 [00:03<00:00, 10.65it/s]


Epoch: 9, Steps: 668 | Train Loss: 0.0195750 Val Loss: 0.0255464 Val PSFA: 0.6693244


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.75it/s]
100%|██████████| 32/32 [00:03<00:00, 10.61it/s]


Epoch: 10, Steps: 668 | Train Loss: 0.0194928 Val Loss: 0.0273059 Val PSFA: 0.6569960


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.74it/s]
100%|██████████| 32/32 [00:02<00:00, 10.69it/s]


Epoch: 11, Steps: 668 | Train Loss: 0.0194153 Val Loss: 0.0259514 Val PSFA: 0.6628821


100%|██████████| 668/668 [08:59<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.75it/s]
100%|██████████| 32/32 [00:03<00:00, 10.64it/s]


Epoch: 12, Steps: 668 | Train Loss: 0.0193675 Val Loss: 0.0257072 Val PSFA: 0.6962169


100%|██████████| 668/668 [09:01<00:00,  1.23it/s]
100%|██████████| 187/187 [01:08<00:00,  2.71it/s]
100%|██████████| 32/32 [00:03<00:00, 10.52it/s]


Epoch: 13, Steps: 668 | Train Loss: 0.0192574 Val Loss: 0.0261663 Val PSFA: 0.7086976


100%|██████████| 668/668 [09:01<00:00,  1.23it/s]
100%|██████████| 187/187 [01:08<00:00,  2.72it/s]
100%|██████████| 32/32 [00:03<00:00, 10.58it/s]


Epoch: 14, Steps: 668 | Train Loss: 0.0192160 Val Loss: 0.0282804 Val PSFA: 0.6934472


100%|██████████| 668/668 [09:02<00:00,  1.23it/s]
100%|██████████| 187/187 [01:08<00:00,  2.71it/s]
100%|██████████| 32/32 [00:03<00:00, 10.56it/s]


Epoch: 15, Steps: 668 | Train Loss: 0.0191779 Val Loss: 0.0265436 Val PSFA: 0.6856684


100%|██████████| 668/668 [09:01<00:00,  1.23it/s]
100%|██████████| 187/187 [01:08<00:00,  2.72it/s]
100%|██████████| 32/32 [00:03<00:00, 10.57it/s]


Epoch: 16, Steps: 668 | Train Loss: 0.0191128 Val Loss: 0.0272504 Val PSFA: 0.6884712


100%|██████████| 668/668 [09:01<00:00,  1.23it/s]
100%|██████████| 187/187 [01:08<00:00,  2.71it/s]
100%|██████████| 32/32 [00:03<00:00, 10.52it/s]


Epoch: 17, Steps: 668 | Train Loss: 0.0190552 Val Loss: 0.0273182 Val PSFA: 0.7059108


100%|██████████| 668/668 [09:01<00:00,  1.23it/s]
100%|██████████| 187/187 [01:08<00:00,  2.72it/s]
100%|██████████| 32/32 [00:03<00:00, 10.59it/s]


Epoch: 18, Steps: 668 | Train Loss: 0.0189990 Val Loss: 0.0265878 Val PSFA: 0.7095292


100%|██████████| 668/668 [09:01<00:00,  1.23it/s]
100%|██████████| 187/187 [01:08<00:00,  2.72it/s]
100%|██████████| 32/32 [00:03<00:00, 10.48it/s]


Epoch: 19, Steps: 668 | Train Loss: 0.0189391 Val Loss: 0.0260865 Val PSFA: 0.6537138


100%|██████████| 668/668 [09:00<00:00,  1.24it/s]
100%|██████████| 187/187 [01:08<00:00,  2.72it/s]
100%|██████████| 32/32 [00:03<00:00, 10.52it/s]


Epoch: 20, Steps: 668 | Train Loss: 0.0188854 Val Loss: 0.0270927 Val PSFA: 0.7014424


## 모델 추론

In [None]:
def inference(model, test_loader, device):
  model = model.to(device)
  model.eval()
  predictions = []

  with torch.no_grad():
    for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(test_loader)):
        pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
        pred = pred.detach().cpu().numpy()

        predictions.extend(pred)

  return np.array(predictions)

In [None]:
pred = inference(infer_model, test_loader, device)

100%|██████████| 8/8 [00:04<00:00,  2.00it/s]


In [None]:
pred = pred.squeeze(-1)
pred.shape

(15890, 21)

In [None]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

# 결과 후처리
pred = np.round(pred, 0).astype(int)

## Submission

In [None]:
submit = pd.read_csv('../Part2/sample_submission.csv')

In [None]:
submit.iloc[:,1:] = pred

In [None]:
submit = pd.read_csv('./submit_files/submit_informer_0907_notreplace.csv')

In [None]:
submit.describe()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
count,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,...,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0
mean,7944.5,8.025928,8.130522,10.077533,11.348081,11.224544,10.837256,10.383638,10.901007,11.436375,...,10.673065,10.501951,10.452549,11.280931,12.068974,12.164632,12.011391,11.695469,11.335431,11.606797
std,4587.192224,77.715704,74.536339,87.402104,121.375368,105.504345,95.39102,83.507802,85.04179,89.801715,...,79.708594,77.117477,76.055912,81.62795,90.330451,90.647045,87.920297,84.066578,80.096669,81.313968
min,0.0,-47.0,-93.0,-90.0,-108.0,-18.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3972.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,7944.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,11916.75,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0,3.0,...,3.0,3.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
max,15889.0,4928.0,4287.0,4318.0,7924.0,6141.0,5531.0,4807.0,4900.0,5372.0,...,4872.0,4156.0,3823.0,3942.0,4450.0,4839.0,4751.0,4324.0,3953.0,3682.0


In [None]:
train_data = pd.read_csv('./Part2/train.csv').drop(columns=['ID', '제품'])

ids = train_data.index[train_data.iloc[:,422:456].sum(axis=1)==0]
around = list(range(422-7,422))+list(range(457,463))
mean = train_data.iloc[ids,around].mean(axis=1)
mean = mean.values.reshape(-1,1)

replace_data = np.column_stack([mean for i in range(34)])
train_data.iloc[ids,422:456] = replace_data

m = (train_data.iloc[:,4:][train_data.iloc[:,4:]!=0]).mean(axis=1)
s = (train_data.iloc[:,4:][train_data.iloc[:,4:]!=0]).std(axis=1)

upper = m+3*s

temp = train_data.iloc[:,4:].T
def replace(column,upper):
    column_index = temp.columns.get_loc(column.name)
    u = upper[column_index]
    ret = np.where(column>u,u,column)
    return ret

temp = temp.apply(lambda x:replace(x,upper))
train_data.iloc[:,4:] = temp.T

In [None]:
train_data = train_data.iloc[:,4:]

idx = []
for i in range(len(train_data)):
    if train_data.iloc[i,:].sum(axis=0) == 0:
        idx.append(i)

train_data = train_data.replace(0, np.NaN)
train_data.iloc[idx,:] = 0
data = np.array(train_data)
median = np.nanmedian(data, axis=1)

submit_temp = np.array(submit.iloc[:,1:])

for idx in range(len(submit_temp)):
    submit_temp[idx,:] = np.where(submit_temp[idx,:]<1, max(median[idx],1), submit_temp[idx,:])

submit.iloc[:,1:] = submit_temp

In [None]:
submit.describe()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
count,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,...,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0
mean,7944.5,15.482756,17.75236,18.022089,21.741221,15.580302,13.203587,12.258276,12.583575,12.949654,...,12.01051,11.777218,11.713971,12.419887,13.100126,13.159849,12.995721,12.659094,12.303084,12.524418
std,4587.192224,106.551789,120.021059,114.08531,252.159961,114.972335,95.591012,83.415568,84.931964,89.691141,...,79.588165,76.996726,75.930825,81.50743,90.219289,90.538385,87.811656,83.95836,79.986822,81.207072
min,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,3972.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,7944.5,3.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
75%,11916.75,7.0,8.0,8.0,9.0,7.0,6.0,6.0,6.0,6.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
max,15889.0,6200.0,6200.0,5616.0,26082.0,6141.0,5531.0,4807.0,4900.0,5372.0,...,4872.0,4156.0,3823.0,3942.0,4450.0,4839.0,4751.0,4324.0,3953.0,3682.0


In [None]:
submit_path = 'submit_informer'
submit.to_csv(f'../submit_files/{submit_path}.csv', index=False)