## Import

In [1]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/LG_AIMERS

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/.shortcut-targets-by-id/1bo8OXmxCtxx_kqaMVbLxgeD7-6fO--3a/LG_AIMERS


In [2]:
import random
import os
# os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from Informer2020.utils.timefeatures import time_features
import warnings
warnings.filterwarnings('ignore')

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

## Hyperparameter Setting

In [4]:
CFG = {
    'seq_len':90, # 90일치로 학습
    'label_len':30, # 21일치 예측
    'pred_len':21,
    'EPOCHS':20,
    'LEARNING_RATE':1e-5,
    'BATCH_SIZE':2048,
    'SEED':41,
    'STRIDE':3,
    'checkpoint':'ckpt_sewoong_3',
    'timeenc':1
}

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

## 데이터 불러오기

In [6]:
train_data = pd.read_csv('./data/z_score/valid_replace_outlier_train.csv').drop(columns=['ID', '제품'])

In [7]:
brand = pd.read_csv('./data/z_score/brand_keyword_log.csv')

In [8]:
sales = pd.read_csv('./Part2/sales.csv')

In [9]:
train_diff = pd.read_csv('./data/z_score/train_diff.csv',index_col=0)

In [10]:
train_mean = pd.read_csv('./data/z_score/train_21_mean.csv')
train_mean = train_mean.T

In [11]:
train_mean.columns = train_data.columns[4:]

In [12]:
empty_brand = brand.set_index('브랜드').T.isnull().sum().sort_values(ascending=False)[:35].index

In [13]:
# periods = pd.to_datetime(train_data.columns[4:])
# common_info = pd.DataFrame({'weekday':periods.weekday.values},index = train_data.columns[4:])
# periods_data = pd.read_csv('./data/period.csv')
# periods_data['week'] = periods_data['day'].apply(cal_week)
# common_info['season'] = periods_data['season'].values
# common_info['week'] = periods_data['week'].values

In [14]:
empty_brand = brand.set_index('브랜드').T.isnull().sum().sort_values(ascending=False)[:35].index
col = brand.columns[1:]
mean = brand.iloc[:,1:].mean()
for b in empty_brand:
    brand.loc[brand['브랜드']==b,col] = mean.values

# brand = brand.fillna(0)

In [15]:
# change_cnts = pd.read_csv('./data/cost_std_and_changes.csv')
# change_cnts = change_cnts[['change_cnt']]

In [16]:
# cost = sales.iloc[:,6:] / train_data.iloc[:,4:]
# cost = cost.T
# cost.fillna(method='ffill',inplace=True)
# cost.fillna(method='bfill',inplace=True)

## 데이터 전처리

In [17]:
# max_value = change_cnts['change_cnt'].max()
# min_value = change_cnts['change_cnt'].min()
# diff = max_value - min_value

# change_cnts['change_cnt'] = (change_cnts['change_cnt'] - min_value) / diff

In [18]:
# numeric_cols = val_data.columns[4:]
# val_data[numeric_cols] = (val_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)

In [19]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류']

for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])

label_encoder.fit(brand['브랜드'])
train_data['브랜드'] = label_encoder.transform(train_data['브랜드'])
brand['브랜드'] = label_encoder.transform(brand['브랜드'])

In [20]:
num_train = int(len(train_data.columns[4:])*0.8)
num_vali = len(train_data.columns[4:]) - num_train
border1s = [0, num_train-CFG['seq_len']]
border2s = [num_train, num_train+num_vali]
set_type = {'train':0,'valid':1}

In [21]:
total_scale = True
# 숫자형 변수들의 min-max scaling을 수행하는 코드입니다.
numeric_cols = train_data.columns[4:]
# 칵 column의 min 및 max 계산

if total_scale:
  min_values = train_data[numeric_cols].min(axis=1)
  max_values = train_data[numeric_cols].max(axis=1)
else:
  min_values = train_data[numeric_cols[border1s[0]:border2s[0]]].min(axis=1)
  max_values = train_data[numeric_cols[border1s[0]:border2s[0]]].max(axis=1)
# 각 행의 범위(max-min)를 계산하고, 범위가 0인 경우 1로 대체
ranges = max_values - min_values
ranges[ranges == 0] = 1  ##기존 코드에서는 0으로 처리함
# min-max scaling 수행
train_data[numeric_cols] = (train_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)
# max와 min 값을 dictionary 형태로 저장
scale_min_dict = min_values.to_dict()
scale_max_dict = max_values.to_dict()

In [22]:
train_mean.index = min_values.index
train_mean[numeric_cols] = (train_mean[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)

In [23]:
val_data = train_data.iloc[:,4 + border1s[1]:]
val_data = pd.concat([train_data.iloc[:,:4],val_data],axis=1)
train_data = train_data.iloc[:, :4 + border2s[0]]

## make dataset

In [24]:
def make_data(data, seq_len=CFG['seq_len'], pred_len=CFG['pred_len'], label_len=CFG['label_len'], stride=CFG['STRIDE']):

    date = pd.DataFrame({'date':pd.to_datetime(data.columns[4:])})
    data_stamp = time_features(date, timeenc=CFG['timeenc'], freq='d') ## time_enc=0 : month,day,weekday / time_enc=1 : month,dasy,weekday를 encoding
    num_rows = len(data) #train의 행 => 제품개수
    window_size = seq_len + pred_len

    enc_input_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), seq_len, 4))
    dec_input_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), label_len + pred_len, 4))

    enc_mark_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), seq_len, data_stamp.shape[1]+4))
    dec_mark_data = np.empty((num_rows * len(range(0,len(data.columns) - 4 - window_size + 1,stride)), label_len + pred_len, data_stamp.shape[1]+4))

    for i in tqdm(range(num_rows)): #각 ID 마다
        encode_info = np.array(data.iloc[i, :4]) #분류 정보
        sales_data = np.array(data.iloc[i, 4:]) #판매 정보
        temp_brand = brand.loc[brand['브랜드']==encode_info[-1],data.columns[4:]].values[0]
        t_diff = np.array(train_diff.iloc[i,4:])
        t_mean = np.array(train_mean.iloc[i,:])

        assert len(temp_brand)==len(sales_data)

        for idx,j in enumerate(range(0,len(sales_data) - window_size + 1, stride)):

            s_begin = j
            s_end = s_begin + seq_len
            r_begin = s_end - label_len
            r_end = r_begin + label_len + pred_len

            enc_mark = data_stamp[s_begin:s_end]
            dec_mark = data_stamp[r_begin:r_end]

            enc_info = np.stack([encode_info for _ in range(s_end-s_begin)])
            dec_info = np.stack([encode_info for _ in range(r_end-r_begin)])

            enc_mark = np.concatenate([enc_mark,enc_info],axis=1)
            dec_mark = np.concatenate([dec_mark,dec_info],axis=1)

            brand_window = temp_brand[j : j + window_size]
            sales_window = sales_data[j : j + window_size]
            t_diff_window = t_diff[j : j + window_size]
            t_mean_window = t_mean[j : j + window_size]

            # enc_temp_data = np.column_stack((np.tile(encode_info, (seq_len, 1)),
            #                              np.tile(changes, (seq_len, 1)),
            #                              brand_window[:seq_len],
            #                              sales_window[:seq_len]))

            enc_temp_data = np.column_stack((brand_window[:seq_len],
                                             t_diff_window[:seq_len],
                                             t_mean_window[:seq_len],
                                             sales_window[:seq_len]))

            # dec_temp_data = np.column_stack((np.tile(encode_info, (label_len+pred_len, 1)),
            #                              np.tile(changes, (label_len+pred_len, 1)),
            #                              brand_window[-(label_len+pred_len):],
            #                              sales_window[-(label_len+pred_len):]))

            dec_temp_data = np.column_stack((brand_window[-(label_len+pred_len):],
                                             t_diff_window[-(label_len+pred_len):],
                                             t_mean_window[-(label_len+pred_len):],
                                             sales_window[-(label_len+pred_len):]))

            enc_input_data[i * len(range(0,len(data.columns) - 4 - window_size + 1,stride)) + idx] = enc_temp_data
            dec_input_data[i * len(range(0,len(data.columns) - 4 - window_size + 1,stride)) + idx] = dec_temp_data

            enc_mark_data[i] = enc_mark
            dec_mark_data[i] = dec_mark

    return enc_input_data, dec_input_data, enc_mark_data, dec_mark_data

In [25]:
def make_predict_data(data, seq_len=CFG['seq_len'], pred_len=CFG['pred_len'], label_len=CFG['label_len']):

    date = pd.DataFrame({'date':pd.to_datetime(data.columns[-seq_len:])})
    pred_dates = pd.date_range(date.date.values[-1], periods=pred_len+1, freq='d')
    df_stamp = pd.DataFrame(columns = ['date'])
    df_stamp.date = list(date.date.values) + list(pred_dates[1:])
    data_stamp = time_features(df_stamp, timeenc=CFG['timeenc'], freq='d')

    num_rows = len(data)
    window_size = seq_len + pred_len

    enc_input_data = np.empty((num_rows, seq_len, 4))
    dec_input_data = np.empty((num_rows, label_len, 4))

    # enc_mark_data = np.array([data_stamp[:seq_len] for _ in range(num_rows)])
    # dec_mark_data = np.array([data_stamp[seq_len-label_len:] for _ in range(num_rows)])
    enc_mark_data = np.empty((num_rows, seq_len, data_stamp.shape[1] + 4))
    dec_mark_data = np.empty((num_rows, label_len + pred_len, data_stamp.shape[1] + 4))

    enc_mark = data_stamp[:seq_len]
    dec_mark = data_stamp[seq_len-label_len:]

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -seq_len:])
        temp_brand = brand[brand['브랜드']==encode_info[-1]].values[0][-seq_len:]
        temp_t_diff = np.array(train_diff.iloc[i,-seq_len:])
        temp_t_mean = np.array(train_mean.iloc[i,-seq_len:])
        # changes = np.array(change_cnts.iloc[i,:])

        enc_info = np.stack([encode_info for _ in range(seq_len)])
        dec_info = np.stack([encode_info for _ in range(label_len+pred_len)])

        enc_mark_temp = np.concatenate([enc_mark,enc_info],axis=1)
        dec_mark_temp = np.concatenate([dec_mark,dec_info],axis=1)

        sales_window = sales_data[-seq_len : ]
        brand_window = temp_brand[-seq_len : ]
        t_diff_window = temp_t_diff[-seq_len : ]
        t_mean_window = temp_t_mean[-seq_len : ]

        # enc_temp_data = np.column_stack((np.tile(encode_info, (seq_len, 1)),
        #                                  np.tile(changes, (seq_len, 1)),
        #                                  brand_window[:seq_len],
        #                                  sales_window[:seq_len]))

        enc_temp_data = np.column_stack((brand_window[:seq_len],
                                         t_diff_window[:seq_len],
                                         t_mean_window[:seq_len],
                                         sales_window[:seq_len]))

        # dec_temp_data = np.column_stack((np.tile(encode_info, (label_len, 1)),
        #                                  np.tile(changes, (label_len, 1)),
        #                                  brand_window[-label_len:],
        #                                  sales_window[-label_len:]))

        dec_temp_data = np.column_stack((brand_window[-label_len:],
                                         t_diff_window[-label_len:],
                                         t_mean_window[-label_len:],
                                         sales_window[-label_len:]))

        enc_input_data[i] = enc_temp_data
        dec_input_data[i] = dec_temp_data

        enc_mark_data[i] = enc_mark_temp
        dec_mark_data[i] = dec_mark_temp

    return enc_input_data, dec_input_data, enc_mark_data, dec_mark_data

In [26]:
def make_psfa_data(data, seq_len=CFG['seq_len'], pred_len=CFG['pred_len'], label_len=CFG['label_len']):

    num_rows = len(data) #train의 행 => 제품개수
    window_size = seq_len + pred_len

    date = pd.DataFrame({'date':pd.to_datetime(data.columns[4:])})
    data_stamp = time_features(date, timeenc=CFG['timeenc'], freq='d') ## time_enc=0 : month,day,weekday / time_enc=1 : month,dasy,weekday를 encoding
    data_stamp = data_stamp[-window_size:,:]

    enc_input_data = np.empty((num_rows, seq_len, 4))
    dec_input_data = np.empty((num_rows, label_len + pred_len, 4))

    # enc_mark_data = np.array([data_stamp[:seq_len] for _ in range(num_rows)])
    # dec_mark_data = np.array([data_stamp[-(label_len+pred_len):] for _ in range(num_rows)])
    enc_mark_data = np.empty((num_rows, seq_len, data_stamp.shape[1] + 4))
    dec_mark_data = np.empty((num_rows, label_len + pred_len, data_stamp.shape[1] + 4))

    enc_mark = data_stamp[:seq_len]
    dec_mark = data_stamp[seq_len-label_len:]

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -window_size:])
        temp_brand = brand[brand['브랜드']==encode_info[-1]].values[0][-window_size:]
        temp_t_diff = np.array(train_diff.iloc[i, -window_size:])
        temp_t_mean = np.array(train_mean.iloc[i, -window_size:])
        # changes = np.array(change_cnts.iloc[i,:])

        sales_window = sales_data[-window_size : ]
        brand_window = temp_brand[-window_size : ]
        t_diff_window = temp_t_diff[-window_size : ]
        t_mean_window = temp_t_mean[-window_size : ]

        enc_info = np.stack([encode_info for _ in range(seq_len)])
        dec_info = np.stack([encode_info for _ in range(label_len+pred_len)])

        enc_mark_temp = np.concatenate([enc_mark,enc_info],axis=1)
        dec_mark_temp = np.concatenate([dec_mark,dec_info],axis=1)

        # enc_temp_data = np.column_stack((np.tile(encode_info, (seq_len, 1)),
        #                                  np.tile(changes, (seq_len, 1)),
        #                                  brand_window[:seq_len],
        #                                  sales_window[:seq_len]))

        enc_temp_data = np.column_stack((brand_window[:seq_len],
                                         t_diff_window[:seq_len],
                                         t_mean_window[:seq_len],
                                         sales_window[:seq_len]))

        # dec_temp_data = np.column_stack((np.tile(encode_info, (label_len+pred_len, 1)),
        #                                  np.tile(changes, (label_len+pred_len, 1)),
        #                                  brand_window[-(label_len+pred_len):],
        #                                  sales_window[-(label_len+pred_len):]))

        dec_temp_data = np.column_stack((brand_window[-(label_len+pred_len):],
                                         t_diff_window[-(label_len+pred_len):],
                                         t_mean_window[-(label_len+pred_len):],
                                         sales_window[-(label_len+pred_len):]))

        enc_input_data[i] = enc_temp_data
        dec_input_data[i] = dec_temp_data

        enc_mark_data[i] = enc_mark_temp
        dec_mark_data[i] = dec_mark_temp

    return enc_input_data, dec_input_data, enc_mark_data, dec_mark_data

In [27]:
enc_train_data, dec_train_data, enc_mark_train_data, dec_mark_train_data = make_data(train_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [28]:
enc_train_data.shape, dec_train_data.shape, enc_mark_train_data.shape, dec_mark_train_data.shape

((1366540, 90, 4), (1366540, 51, 4), (1366540, 90, 9), (1366540, 51, 9))

In [29]:
enc_val_data, dec_val_data, enc_mark_val_data, dec_mark_val_data = make_data(val_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [30]:
enc_val_data.shape, dec_val_data.shape, enc_mark_val_data.shape, dec_mark_val_data.shape

((381360, 90, 4), (381360, 51, 4), (381360, 90, 9), (381360, 51, 9))

In [31]:
enc_test_data, dec_test_data, enc_mark_test_data, dec_mark_test_data = make_predict_data(val_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [32]:
enc_test_data.shape, dec_test_data.shape, enc_mark_test_data.shape, dec_mark_test_data.shape

((15890, 90, 4), (15890, 30, 4), (15890, 90, 9), (15890, 51, 9))

In [33]:
enc_psfa_data, dec_psfa_data, enc_mark_psfa_data, dec_mark_psfa_data = make_psfa_data(val_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

In [34]:
enc_psfa_data.shape, dec_psfa_data.shape, enc_mark_psfa_data.shape, dec_mark_psfa_data.shape

((15890, 90, 4), (15890, 51, 4), (15890, 90, 9), (15890, 51, 9))

## PSFA

In [35]:
indexs_bigcat={}
for bigcat in train_data['대분류'].unique():
    indexs_bigcat[bigcat] = list(train_data.loc[train_data['대분류']==bigcat].index)

indexs_bigcat.keys()

## 대분류별로 15890개 item의 index

dict_keys([1, 2, 0, 4, 3])

In [36]:
##item별로 딱 한번만 예측해서 15890개만 존재할때 사용

def PSFA(pred, target):
    PSFA = 1
    for cat in range(5):
        ids = indexs_bigcat[cat]
        for day in range(CFG['pred_len']):
            total_sell = np.sum(target[ids, day]) # day별 총 판매량
            pred_values = pred[ids, day] # day별 예측 판매량
            target_values = target[ids, day] # day별 실제 판매량

            # 실제 판매와 예측 판매가 같은 경우 오차가 없는 것으로 간주
            denominator = np.maximum(target_values, pred_values)
            diffs = np.where(denominator!=0, np.abs(target_values - pred_values) / denominator, 0)

            if total_sell != 0:
                sell_weights = target_values / total_sell  # Item별 day 총 판매량 내 비중
            else:
                sell_weights = np.ones_like(target_values) / len(ids)  # 1 / len(ids)로 대체

            if not np.isnan(diffs).any():  # diffs에 NaN이 없는 경우에만 PSFA 값 업데이트
                PSFA -= np.sum(diffs * sell_weights) / (CFG['pred_len'] * 5)


    return PSFA

## Custom Dataset

In [37]:
%cd Informer2020

/content/drive/.shortcut-targets-by-id/1bo8OXmxCtxx_kqaMVbLxgeD7-6fO--3a/LG_AIMERS/Informer2020


In [38]:
from data.data_loader import Dataset_Custom

In [39]:
train_dataset = Dataset_Custom(enc_train_data, dec_train_data, enc_mark_train_data, dec_mark_train_data)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = Dataset_Custom(enc_val_data, dec_val_data, enc_mark_val_data, dec_mark_val_data)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

psfa_dataset = Dataset_Custom(enc_psfa_data, dec_psfa_data, enc_mark_psfa_data, dec_mark_psfa_data)
psfa_loader = DataLoader(psfa_dataset, batch_size = 512, shuffle=False, num_workers=0)

test_dataset = Dataset_Custom(enc_test_data, dec_test_data, enc_mark_test_data, dec_mark_test_data)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## 모델 선언

#### parameter

In [40]:
input_size = enc_test_data.shape[-1]
input_size

4

In [41]:
enc_in = input_size
dec_in = input_size
c_out = 1
seq_len = CFG['seq_len']
label_len = CFG['label_len']
out_len = CFG['pred_len']
output_attention = False
freq = 'd'
embed = 'timeF'
dropout = 0.1
d_model=512
attn = 'full'
device = device

learning_rate = CFG['LEARNING_RATE']

In [42]:
from models.model import Informer

model = Informer(enc_in=enc_in, dec_in=dec_in, c_out=c_out, seq_len=seq_len, label_len=label_len, out_len=out_len, d_model=d_model, attn=attn, output_attention=output_attention, freq=freq, embed=embed, dropout=dropout, device=device)

## 모델 학습

In [43]:
def process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device):
  batch_x = batch_x.float().to(device)
  batch_y = batch_y.float()

  batch_x_mark = batch_x_mark.float().to(device)
  batch_y_mark = batch_y_mark.float().to(device)

  # decoder input
  dec_inp = torch.zeros([batch_y.shape[0], CFG['pred_len'], batch_y.shape[-1]]).float()
  # dec_inp = torch.ones([batch_y.shape[0], self.args.pred_len, batch_y.shape[-1]]).float()
  dec_inp = torch.cat([batch_y[:,:CFG['label_len'],:], dec_inp], dim=1).float().to(device)
  # encoder - decoder
  outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

  f_dim = -1 #feature = 'MS'
  batch_y = batch_y[:,-CFG['pred_len']:,f_dim:].to(device)

  return outputs, batch_y

def adjust_learning_rate(optimizer, epoch):
  # lr = args.learning_rate * (0.2 ** (epoch // 2))
  lr_adjust = {epoch: CFG['LEARNING_RATE'] * (0.5 ** ((epoch-1) // 1))}

  if epoch in lr_adjust.keys():
      lr = lr_adjust[epoch]
      for param_group in optimizer.param_groups:
          param_group['lr'] = lr
      print('Updating learning rate to {}'.format(lr))

In [44]:
location = CFG['checkpoint']

In [45]:
import time
from tqdm import tqdm

def get_psfa(model, psfa_loader, device):
  psfa_pred_list = []
  psfa_target_list = []

  with torch.no_grad():
    for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(psfa_loader)):
      pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
      pred = pred.cpu().numpy()
      true = true.cpu().numpy()

      psfa_pred_list.extend(pred)
      psfa_target_list.extend(true)

  psfa_pred_list = np.array(psfa_pred_list)
  psfa_target_list = np.array(psfa_target_list)

  for idx in range(len(psfa_pred_list)): #15889
    psfa_pred_list[idx, :] = psfa_pred_list[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]
    psfa_target_list[idx, :] = psfa_target_list[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

  psfa_pred_list = np.round(psfa_pred_list, 0).astype(int)
  psfa_target_list = np.round(psfa_target_list, 0).astype(int)

  psfa_pred_list = psfa_pred_list.squeeze(-1)
  psfa_target_list = psfa_target_list.squeeze(-1)

  return PSFA(psfa_pred_list,psfa_target_list)


def vali(model, val_loader, psfa_loader, criterion, optimizer, device):
  model.eval()
  total_loss = []
  with torch.no_grad():
    for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(val_loader)):
        pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
        loss = criterion(pred.detach().cpu(), true.detach().cpu())
        total_loss.append(loss)

  total_loss = np.average(total_loss)
  psfa_score = get_psfa(model, psfa_loader, device)
  return total_loss, psfa_score

def train(model, train_loader, val_loader, psfa_loader, optimizer, criterion, device):
  best_loss = 9999999
  best_psfa = 0
  best_psfa_model = None
  best_loss_model = None
  patient = 0
  train_loss_list = []
  val_loss_list = []
  val_psfa_list = []
  model = model.to(device)

  path = os.path.join('./checkpoint', location)
  if not os.path.exists(path):
      os.makedirs(path)

  train_steps = len(train_loader)

  for epoch in range(CFG['EPOCHS']):
      iter_count = 0
      train_loss = []
      model.train()

      for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(train_loader)):

          optimizer.zero_grad()
          pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
          loss = criterion(pred, true)
          train_loss.append(loss.item())

          loss.backward()
          optimizer.step()

      train_loss = np.average(train_loss)
      val_loss, val_psfa = vali(model, val_loader, psfa_loader, criterion, optimizer, device)

      train_loss_list.append(train_loss)
      val_loss_list.append(val_loss)
      val_psfa_list.append(val_psfa)

      print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Val Loss: {3:.7f} Val PSFA: {4:.7f}".format(epoch + 1, train_steps, train_loss, val_loss, val_psfa))

      if best_loss > val_loss:
          best_loss = val_loss
          best_loss_model = model
          torch.save(model.state_dict(), path+'/'+'ckpt_loss.pth')
          patient = 0
          print('Loss Model Saved')

      if best_psfa < val_psfa:
          best_psfa = val_psfa
          best_psfa_model = model
          torch.save(model.state_dict(), path+'/'+'ckpt_psfa.pth')
          patient = 0
          print('PSFA Model Saved')

      #adjust_learning_rate(optimizer, epoch+1)

  best_model_path = path+'/'+'ckpt_psfa.pth'
  #model.load_state_dict(torch.load(best_model_path))

  return best_psfa_model, train_loss_list, val_loss_list, val_psfa_list

## Run !!

In [140]:
#0828
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
criterion = nn.MSELoss()
infer_model, train_loss_list, val_loss_list, val_psfa_list = train(model, train_loader, val_loader, psfa_loader, optimizer, criterion, device)

100%|██████████| 668/668 [08:55<00:00,  1.25it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.50it/s]


Epoch: 1, Steps: 668 | Train Loss: 0.0336782 Val Loss: 0.0221227 Val PSFA: 0.6581801
Loss Model Saved
PSFA Model Saved


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.71it/s]


Epoch: 2, Steps: 668 | Train Loss: 0.0257197 Val Loss: 0.0227170 Val PSFA: 0.6770215
PSFA Model Saved


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.72it/s]


Epoch: 3, Steps: 668 | Train Loss: 0.0252535 Val Loss: 0.0206203 Val PSFA: 0.6907533
Loss Model Saved
PSFA Model Saved


100%|██████████| 668/668 [08:47<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.67it/s]


Epoch: 4, Steps: 668 | Train Loss: 0.0250252 Val Loss: 0.0200775 Val PSFA: 0.7014570
Loss Model Saved
PSFA Model Saved


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.49it/s]
100%|██████████| 32/32 [00:02<00:00, 13.67it/s]


Epoch: 5, Steps: 668 | Train Loss: 0.0248199 Val Loss: 0.0202820 Val PSFA: 0.6843004


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.50it/s]


Epoch: 6, Steps: 668 | Train Loss: 0.0246180 Val Loss: 0.0208427 Val PSFA: 0.6836201


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.72it/s]


Epoch: 7, Steps: 668 | Train Loss: 0.0245256 Val Loss: 0.0225725 Val PSFA: 0.6700682


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.68it/s]


Epoch: 8, Steps: 668 | Train Loss: 0.0243974 Val Loss: 0.0224695 Val PSFA: 0.6885440


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.66it/s]


Epoch: 9, Steps: 668 | Train Loss: 0.0243348 Val Loss: 0.0208684 Val PSFA: 0.6642716


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.64it/s]


Epoch: 10, Steps: 668 | Train Loss: 0.0242499 Val Loss: 0.0208529 Val PSFA: 0.6827042


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.65it/s]


Epoch: 11, Steps: 668 | Train Loss: 0.0241867 Val Loss: 0.0207169 Val PSFA: 0.6785673


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.56it/s]


Epoch: 12, Steps: 668 | Train Loss: 0.0241114 Val Loss: 0.0210812 Val PSFA: 0.6751265


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.68it/s]


Epoch: 13, Steps: 668 | Train Loss: 0.0240484 Val Loss: 0.0215519 Val PSFA: 0.6660266


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.67it/s]


Epoch: 14, Steps: 668 | Train Loss: 0.0239909 Val Loss: 0.0199687 Val PSFA: 0.6678448
Loss Model Saved


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.68it/s]


Epoch: 15, Steps: 668 | Train Loss: 0.0239179 Val Loss: 0.0215837 Val PSFA: 0.6522957


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.61it/s]


Epoch: 16, Steps: 668 | Train Loss: 0.0238412 Val Loss: 0.0217589 Val PSFA: 0.6575720


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.65it/s]


Epoch: 17, Steps: 668 | Train Loss: 0.0237652 Val Loss: 0.0214819 Val PSFA: 0.6527450


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.67it/s]


Epoch: 18, Steps: 668 | Train Loss: 0.0236567 Val Loss: 0.0204101 Val PSFA: 0.6594330


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.74it/s]


Epoch: 19, Steps: 668 | Train Loss: 0.0235537 Val Loss: 0.0202366 Val PSFA: 0.6410267


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.47it/s]
100%|██████████| 32/32 [00:02<00:00, 13.73it/s]


Epoch: 20, Steps: 668 | Train Loss: 0.0233998 Val Loss: 0.0210571 Val PSFA: 0.6467394


In [None]:
#0828_2
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
criterion = nn.MSELoss()
infer_model, train_loss_list, val_loss_list, val_psfa_list = train(model, train_loader, val_loader, psfa_loader, optimizer, criterion, device)

100%|██████████| 668/668 [08:48<00:00,  1.26it/s]
100%|██████████| 187/187 [00:53<00:00,  3.49it/s]
100%|██████████| 32/32 [00:02<00:00, 13.53it/s]


Epoch: 1, Steps: 668 | Train Loss: 0.0357827 Val Loss: 0.0253793 Val PSFA: 0.6431884
Loss Model Saved
PSFA Model Saved


100%|██████████| 668/668 [08:46<00:00,  1.27it/s]
100%|██████████| 187/187 [00:53<00:00,  3.48it/s]
100%|██████████| 32/32 [00:02<00:00, 13.68it/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Epoch: 2, Steps: 668 | Train Loss: 0.0273339 Val Loss: 0.0224070 Val PSFA: 0.6683631
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-46-afbbdd20781a>", line 4, in <cell line: 4>
    infer_model, train_loss_list, val_loss_list, val_psfa_list = train(model, train_loader, val_loader, psfa_loader, optimizer, criterion, device)
  File "<ipython-input-45-6e17776126e4>", line 90, in train
    torch.save(model.state_dict(), path+'/'+'ckpt_loss.pth')
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 440, in save
    with _open_zipfile_writer(f) as opened_zipfile:
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 315, in _open_zipfile_writer
    return container(name_or_buffer)
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 288, in __init__
  

## 모델 추론

In [46]:
def inference(model, test_loader, device):
  model = model.to(device)
  model.eval()
  predictions = []

  with torch.no_grad():
    for batch_x,batch_y,batch_x_mark,batch_y_mark in tqdm(iter(test_loader)):
        pred, true = process_one_batch(model, batch_x, batch_y, batch_x_mark, batch_y_mark, device)
        pred = pred.detach().cpu().numpy()

        predictions.extend(pred)

  return np.array(predictions)

In [None]:
pred = inference(infer_model, test_loader, device)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-3c242c8ed6e4>", line 1, in <cell line: 1>
    pred = inference(infer_model, test_loader, device)
NameError: name 'infer_model' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/usr/local/lib/python

In [None]:
pred = pred.squeeze(-1)
pred.shape

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-4a30d692f618>", line 1, in <cell line: 1>
    pred = pred.squeeze(-1)
NameError: name 'pred' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ul

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-4a30d692f618>", line 1, in <cell line: 1>
    pred = pred.squeeze(-1)
NameError: name 'pred' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactive

In [None]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

# 결과 후처리
pred = np.round(pred, 0).astype(int)

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-24594494eaaf>", line 2, in <cell line: 2>
    for idx in range(len(pred)):
NameError: name 'pred' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/usr/local/lib/python3.10/dist-packages/IPython/co

## Submission

In [None]:
submit = pd.read_csv('../Part2/sample_submission.csv')

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-2048158461ca>", line 1, in <cell line: 1>
    submit = pd.read_csv('../Part2/sample_submission.csv')
  File "/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py", line 331, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 950, in read_csv
    return _read(filepath_or_buffer, kwds)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 605, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py", line 1442, in __init__
    self._engine

In [None]:
submit.iloc[:,1:] = pred

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-e450071caa6c>", line 1, in <cell line: 1>
    submit.iloc[:,1:] = pred
NameError: name 'pred' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/u

In [None]:
submit.describe()

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-355fe73e6f53>", line 1, in <cell line: 1>
    submit.describe()
NameError: name 'submit' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/ultrat

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-355fe73e6f53>", line 1, in <cell line: 1>
    submit.describe()
NameError: name 'submit' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshel

In [None]:
# train = pd.read_csv('../Part2/train.csv').drop(columns=['ID', '제품'])
# med = train.iloc[:,-7:].median(axis=1)
# submit_temp = np.array(submit.iloc[:,1:])

# for idx in range(len(submit_temp)):
#     submit_temp[idx,:] = np.where(submit_temp[idx,:]<1, max(med[idx],1), submit_temp[idx,:])

# submit.iloc[:,1:] = submit_temp

In [148]:
train = pd.read_csv('../data/z_score/valid_replace_outlier_train.csv').drop(columns=['ID', '제품'])
train = train.iloc[:,4:]

idx = []
for i in range(len(train)):
    if train.iloc[i,:].sum(axis=0) == 0:
        idx.append(i)

train = train.replace(0, np.NaN)
train.iloc[idx,:] = 0
data = np.array(train)
median = np.nanmedian(data, axis=1)

submit_temp = np.array(submit.iloc[:,1:])

for idx in range(len(submit_temp)):
    submit_temp[idx,:] = np.where(submit_temp[idx,:]<1, max(median[idx],1), submit_temp[idx,:])

submit.iloc[:,1:] = submit_temp

In [149]:
submit.describe()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
count,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,...,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0,15890.0
mean,7944.5,16.600692,17.558842,17.114223,16.049654,14.741284,13.536564,13.892763,15.170988,16.656828,...,15.024858,14.037193,14.451857,16.102077,17.431403,17.998112,16.997042,15.786281,15.357269,15.571429
std,4587.192224,119.887295,125.715109,114.048697,104.801548,91.16134,79.256733,82.906162,95.055943,109.558957,...,92.236076,84.56369,88.000989,103.989243,116.304446,121.005404,111.320278,98.93928,94.629418,96.973329
min,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,3972.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,7944.5,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
75%,11916.75,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,...,6.0,6.0,6.0,6.0,6.0,7.0,6.0,6.0,6.0,6.0
max,15889.0,5543.0,5380.0,5038.0,4629.0,3733.0,3009.0,3190.0,3912.0,4844.0,...,3654.0,3376.0,3570.0,4557.0,5355.0,5648.0,5074.0,4191.0,3973.0,4161.0


In [150]:
submit_path = 'submit_informer_sewoong_0828'
submit.to_csv(f'../submit_files/{submit_path}.csv', index=False)