# 라이브러리 로딩

In [None]:
# 개발환경 OS : WINDOWS 10, Anaconda

# 라이브러리 버전 정보
# scikit-learn==1.2.2
# pandas==1.3.5
# numpy==1.20.3
# sklearn==0.0
# xgboost==1.7.2
# catboost==1.0.3
# lightgbm==3.3.1
# torch :  1.13.1+cpu

In [1]:
import pandas as pd
import numpy as np

import sys
import os
import random
from pathlib import Path
import copy
import warnings
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.preprocessing import LabelEncoder

import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
from tqdm import tqdm

import copy
from pathlib import Path
import warnings

In [2]:
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from catboost import CatBoostRegressor
import lightgbm as lgb

In [None]:
import torch
from torch.utils.data import TensorDataset # 텐서데이터셋
from torch.utils.data import DataLoader # 데이터로더
from torch import nn
from torch import optim

In [None]:
!pip install holidays
import holidays

In [3]:
# 적당한 크기로 설정
FONT_SMALL = 9
FONT_MIDIUM = 12
FONT_LARGE = 22

# 플롯 크기 기본 설정
plt.rcParams['figure.figsize'] = (12, 5)

plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = (16, 8)
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 14

In [4]:
def seed_all(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore    

seed_all(42)

In [122]:
path='./data/'

# DLinear 모델링

In [123]:
train_data = pd.read_csv(path+'train.csv')
train_data = train_data.rename(columns={'supply(kg)':'supply', 'price(원/kg)':'price'})
train_data

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0
...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0


In [124]:
test_data = pd.read_csv(path+'test.csv')
test_data = test_data.rename(columns={'supply(kg)':'supply', 'price(원/kg)':'price'})
test_data

Unnamed: 0,ID,timestamp,item,corporation,location
0,TG_A_J_20230304,2023-03-04,TG,A,J
1,TG_A_J_20230305,2023-03-05,TG,A,J
2,TG_A_J_20230306,2023-03-06,TG,A,J
3,TG_A_J_20230307,2023-03-07,TG,A,J
4,TG_A_J_20230308,2023-03-08,TG,A,J
...,...,...,...,...,...
1087,RD_F_J_20230327,2023-03-27,RD,F,J
1088,RD_F_J_20230328,2023-03-28,RD,F,J
1089,RD_F_J_20230329,2023-03-29,RD,F,J
1090,RD_F_J_20230330,2023-03-30,RD,F,J


In [9]:
train_data['ts'] = train_data.apply(lambda x : pd.Timestamp(year=int(x.timestamp[:4]), month=int(x.timestamp[5:7]), day=int(x.timestamp[8:10])),axis=1)
train_data['weekday'] = train_data['ts'].dt.weekday
train_data = train_data.drop('ts',axis=1)

In [10]:
test_data['ts'] = test_data.apply(lambda x : pd.Timestamp(year=int(x.timestamp[:4]), month=int(x.timestamp[5:7]), day=int(x.timestamp[8:10])),axis=1)
test_data['weekday'] = test_data['ts'].dt.weekday
test_data = test_data.drop('ts',axis=1)

In [11]:
for i in ['item','corporation','location']:
    le = LabelEncoder()
    le=le.fit(train_data[i])
    train_data[i]=le.transform(train_data[i])

    for case in np.unique(test_data[i]):
        if case not in le.classes_:
            le.classes_ = np.append(le.classes_, case)
    test_data[i]=le.transform(test_data[i])

display(train_data.head())
display(test_data.head())

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,weekday
0,TG_A_J_20190101,2019-01-01,4,0,0,0.0,0.0,1
1,TG_A_J_20190102,2019-01-02,4,0,0,0.0,0.0,2
2,TG_A_J_20190103,2019-01-03,4,0,0,60601.0,1728.0,3
3,TG_A_J_20190104,2019-01-04,4,0,0,25000.0,1408.0,4
4,TG_A_J_20190105,2019-01-05,4,0,0,32352.0,1250.0,5


Unnamed: 0,ID,timestamp,item,corporation,location,weekday
0,TG_A_J_20230304,2023-03-04,4,0,0,5
1,TG_A_J_20230305,2023-03-05,4,0,0,6
2,TG_A_J_20230306,2023-03-06,4,0,0,0
3,TG_A_J_20230307,2023-03-07,4,0,0,1
4,TG_A_J_20230308,2023-03-08,4,0,0,2


In [12]:
ddict = {}
for i,x in enumerate(train_data['timestamp'].unique()):
  ddict[x] = i

train_data['timestamp'] = train_data['timestamp'].map(ddict)
train_data

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,weekday
0,TG_A_J_20190101,0,4,0,0,0.0,0.0,1
1,TG_A_J_20190102,1,4,0,0,0.0,0.0,2
2,TG_A_J_20190103,2,4,0,0,60601.0,1728.0,3
3,TG_A_J_20190104,3,4,0,0,25000.0,1408.0,4
4,TG_A_J_20190105,4,4,0,0,32352.0,1250.0,5
...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,1518,3,5,0,452440.0,468.0,0
59393,RD_F_J_20230228,1519,3,5,0,421980.0,531.0,1
59394,RD_F_J_20230301,1520,3,5,0,382980.0,574.0,2
59395,RD_F_J_20230302,1521,3,5,0,477220.0,523.0,3


In [13]:
ddict = {}
start_ = train_data['timestamp'].max() + 1
for i,x in enumerate(test_data['timestamp'].unique()):
  ddict[x] = start_ + i

test_data['timestamp'] = test_data['timestamp'].map(ddict)
test_data

Unnamed: 0,ID,timestamp,item,corporation,location,weekday
0,TG_A_J_20230304,1523,4,0,0,5
1,TG_A_J_20230305,1524,4,0,0,6
2,TG_A_J_20230306,1525,4,0,0,0
3,TG_A_J_20230307,1526,4,0,0,1
4,TG_A_J_20230308,1527,4,0,0,2
...,...,...,...,...,...,...
1087,RD_F_J_20230327,1546,3,5,0,0
1088,RD_F_J_20230328,1547,3,5,0,1
1089,RD_F_J_20230329,1548,3,5,0,2
1090,RD_F_J_20230330,1549,3,5,0,3


In [14]:
train_data = train_data.drop(['ID','supply'],axis=1)
test_data = test_data.drop(['ID'],axis=1)

In [15]:
# 설정값
window_size = 28
forcast_size= 28
batch_size = 32

targets = 'price'
date = 'timastamp'

In [16]:
test_data = pd.concat([train_data.loc[(train_data.timestamp > train_data.timestamp.max()-28)], test_data ],axis=0, ignore_index=True).reset_index(drop=True)
test_data                    

Unnamed: 0,timestamp,item,corporation,location,price,weekday
0,1495,4,0,0,1864.0,5
1,1496,4,0,0,0.0,6
2,1497,4,0,0,1837.0,0
3,1498,4,0,0,1595.0,1
4,1499,4,0,0,1747.0,2
...,...,...,...,...,...,...
2179,1546,3,5,0,,0
2180,1547,3,5,0,,1
2181,1548,3,5,0,,2
2182,1549,3,5,0,,3


In [17]:
valid_data = train_data.loc[(train_data.timestamp > train_data.timestamp.max()-56) ]

In [18]:
test_data.shape,valid_data.shape

((2184, 6), (2184, 6))

In [19]:
train_week = train_data[['weekday']].copy()
train_data = train_data.drop('weekday',axis=1)

valid_week = valid_data[['weekday']].copy()
valid_data = valid_data.drop('weekday',axis=1)

test_week = test_data[['weekday']].copy()
test_data = test_data.drop('weekday',axis=1)

In [21]:
def time_slide_df(df, window_size, forcast_size,  target):
    df_ = df.copy()
    data_list = []
    dap_list = []
    date_list = []
    for idx in range(0, df_.shape[0]-window_size-forcast_size):
#         print(idx+window_size)
        x = df_.iloc[idx:idx+window_size, :]
        y = df_.iloc[idx+window_size : idx+window_size+forcast_size, :].values
#         date_ = df_.loc[idx+window_size:idx+window_size+forcast_size-1, date].values
        data_list.append(x)
        dap_list.append(y)
#         date_list.append(date_)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32')#, np.array(date_list)


class Data():
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]
    
# train_df_fe, test_df_fe, mean_, std_ = standardization(train_set, test_set, 'date_time', targets)
train_x, train_y = time_slide_df(train_data, window_size, forcast_size, targets)
        
train_ds = Data(train_x, train_y)
valid_ds = Data(train_x[-56:], train_y[-56:])

# time_slide_df(valid_data, window_size, forcast_size, targets)
        

In [22]:
def build_test_dataset(time_series):
    dataX = []
    dataX.append(time_series)
    return np.array(dataX)

testX = build_test_dataset(np.array(valid_data))
testX_tensor = torch.FloatTensor(testX)

In [23]:
train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle=True,)
valid_dl = DataLoader(valid_ds, batch_size = 90, shuffle=False)

In [25]:
class moving_avg(torch.nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = torch.nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x

class series_decomp(torch.nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return moving_mean, residual 
    
class LTSF_DLinear(torch.nn.Module):
    def __init__(self, window_size, forcast_size, kernel_size, individual, feature_size):
        super(LTSF_DLinear, self).__init__()
        self.window_size = window_size
        self.forcast_size = forcast_size
        self.decompsition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forcast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forcast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forcast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size,  self.forcast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))

    def forward(self, x):
        
        trend_init, seasonal_init = self.decompsition(x)
        trend_init, seasonal_init = trend_init.permute(0,2,1), seasonal_init.permute(0,2,1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forcast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forcast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])                
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output
        return x.permute(0,2,1) 

In [26]:
USE_CUDA = torch.cuda.is_available()
print(USE_CUDA)

device = torch.device('cuda:0' if USE_CUDA else 'cpu')
print('학습을 진행하는 기기:',device)

False
학습을 진행하는 기기: cpu


In [27]:
# 모델 학습
DLinear_model = LTSF_DLinear(window_size,
                            forcast_size=forcast_size,
                            kernel_size=27,
                            individual=False,
                            feature_size=1,
                            )

In [28]:
epoch = 5
lr = 0.0001

In [29]:
def model_run(train_data, DLinear_model):
    epoch = 5
    lr = 0.0001

    train_x, train_y = time_slide_df(train_data, window_size, forcast_size, targets)

    train_ds = Data(train_x, train_y)
    valid_ds = Data(train_x[-56:], train_y[-56:])

    testX = build_test_dataset(np.array(valid_data))
    testX_tensor = torch.FloatTensor(testX)

    train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle=True,)
    valid_dl = DataLoader(valid_ds, batch_size = 90, shuffle=False)


    train_loss_list = []
    valid_loss_list = []
    test_loss_list = []

    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
    max_loss = 999999999

    for epoch in tqdm(range(1, epoch+1)):
        loss_list = []
        DLinear_model.train()
        for batch_idx, (data, target) in enumerate(train_dl):
            optimizer.zero_grad()
            output = DLinear_model(data)
    #         print('out',output.shape,target.shape,target.unsqueeze(-1).shape)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            loss_list.append(loss.item())    
        train_loss_list.append(np.mean(loss_list))

        DLinear_model.eval()
        with torch.no_grad():
            for data, target in valid_dl:
                output = DLinear_model(data)    

                valid_loss = criterion(output, target)
                valid_loss_list.append(valid_loss)

    #         for data, target in test_dl:
    #             output = DLinear_model(data)
    #             test_loss = criterion(output, target.unsqueeze(-1))
    #             test_loss_list.append(test_loss)

        if valid_loss < max_loss:
            torch.save(DLinear_model, path+'DLinear_model.pth')
            max_loss = valid_loss
            print("train_loss={:.3f}, valid_los{:.3f}, Model Save".format(loss, valid_loss))
            dlinear_best_epoch = epoch
            dlinear_best_train_loss = np.mean(loss_list)
            dlinear_best_valid_loss = np.mean(valid_loss.item())
    #         dlinear_best_test_loss = np.mean(test_loss.item())

        print("epoch = {}, train_loss : {:.3f}, valid_loss : {:.3f}".format(epoch, np.mean(loss_list), valid_loss))
    return DLinear_model

In [30]:
train_data = train_data.fillna(0)

In [31]:
model_l = []
base = valid_data.timestamp.min()

model_ = model_run(train_data, DLinear_model)

 20%|████████████████▊                                                                   | 1/5 [00:02<00:10,  2.71s/it]

train_loss=432804.844, valid_los8349.799, Model Save
epoch = 1, train_loss : 339793.798, valid_loss : 8349.799



 40%|█████████████████████████████████▌                                                  | 2/5 [00:05<00:08,  2.77s/it]

train_loss=387567.781, valid_los8013.190, Model Save
epoch = 2, train_loss : 292294.691, valid_loss : 8013.190



 60%|██████████████████████████████████████████████████▍                                 | 3/5 [00:08<00:05,  2.72s/it]

epoch = 3, train_loss : 282701.820, valid_loss : 8210.526



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:10<00:02,  2.69s/it]

epoch = 4, train_loss : 280004.780, valid_loss : 9319.320


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:13<00:00,  2.73s/it]

epoch = 5, train_loss : 278888.951, valid_loss : 8090.457





In [32]:
model_l = []
base = valid_data.timestamp.min()

for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():
            print(i,j,k)
        #     train = train.dropna()
            train = train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k)]
            if train.shape[0] == 0:
                model = 'noExist'
                model_l.append(model)                
            else:
                model = model_run(train, model_)
                model_l.append(model)

4 0 0


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 13.51it/s]

train_loss=1155692.250, valid_los133198.641, Model Save
epoch = 1, train_loss : 1105325.535, valid_loss : 133198.641
train_loss=859087.562, valid_los132256.484, Model Save
epoch = 2, train_loss : 1102402.355, valid_loss : 132256.484
train_loss=1179777.500, valid_los132109.203, Model Save
epoch = 3, train_loss : 1102129.879, valid_loss : 132109.203


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 12.92it/s]

epoch = 4, train_loss : 1099801.497, valid_loss : 132232.109
train_loss=787124.938, valid_los131876.266, Model Save
epoch = 5, train_loss : 1098492.098, valid_loss : 131876.266
4 0 1



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.77it/s]

train_loss=444665.562, valid_los135005.172, Model Save
epoch = 1, train_loss : 402389.200, valid_loss : 135005.172
train_loss=334926.906, valid_los133340.891, Model Save
epoch = 2, train_loss : 397116.947, valid_loss : 133340.891
train_loss=441092.844, valid_los131452.812, Model Save
epoch = 3, train_loss : 394175.400, valid_loss : 131452.812
train_loss=244179.234, valid_los131216.562, Model Save
epoch = 4, train_loss : 390795.017, valid_loss : 131216.562


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.28it/s]


train_loss=268463.406, valid_los130809.555, Model Save
epoch = 5, train_loss : 388443.468, valid_loss : 130809.555
4 1 0


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 13.60it/s]

train_loss=1541166.500, valid_los221666.828, Model Save
epoch = 1, train_loss : 1089314.652, valid_loss : 221666.828
epoch = 2, train_loss : 1087238.538, valid_loss : 223104.750
epoch = 3, train_loss : 1084418.951, valid_loss : 223597.484


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.40it/s]

epoch = 4, train_loss : 1084091.321, valid_loss : 223869.422
epoch = 5, train_loss : 1083318.793, valid_loss : 223992.891
4 1 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.28it/s]

train_loss=458925.719, valid_los157745.594, Model Save
epoch = 1, train_loss : 448829.535, valid_loss : 157745.594
train_loss=442807.562, valid_los157112.062, Model Save
epoch = 2, train_loss : 447521.128, valid_loss : 157112.062
train_loss=332661.406, valid_los156644.141, Model Save
epoch = 3, train_loss : 446638.824, valid_loss : 156644.141


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.92it/s]

epoch = 4, train_loss : 446695.284, valid_loss : 156715.109
epoch = 5, train_loss : 446481.643, valid_loss : 156679.766
4 2 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.28it/s]

train_loss=953838.438, valid_los331728.938, Model Save
epoch = 1, train_loss : 1287107.069, valid_loss : 331728.938
epoch = 2, train_loss : 1290018.390, valid_loss : 334097.469
epoch = 3, train_loss : 1285069.959, valid_loss : 333278.781


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.62it/s]

epoch = 4, train_loss : 1285686.837, valid_loss : 334607.156
epoch = 5, train_loss : 1284142.004, valid_loss : 334446.250
4 2 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 12.82it/s]

train_loss=398596.031, valid_los174585.797, Model Save
epoch = 1, train_loss : 535440.942, valid_loss : 174585.797
train_loss=578809.250, valid_los173908.516, Model Save
epoch = 2, train_loss : 535065.630, valid_loss : 173908.516
epoch = 3, train_loss : 534594.110, valid_loss : 174839.547


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 12.34it/s]

train_loss=483312.875, valid_los173583.312, Model Save
epoch = 4, train_loss : 533769.071, valid_loss : 173583.312
train_loss=648178.000, valid_los172624.000, Model Save
epoch = 5, train_loss : 533854.440, valid_loss : 172624.000
4 3 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.08it/s]

train_loss=955695.125, valid_los539836.188, Model Save
epoch = 1, train_loss : 1016192.863, valid_loss : 539836.188
epoch = 2, train_loss : 1012061.196, valid_loss : 540319.438
epoch = 3, train_loss : 1012802.034, valid_loss : 540198.312


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.92it/s]

epoch = 4, train_loss : 1009521.029, valid_loss : 540306.688
epoch = 5, train_loss : 1009637.947, valid_loss : 540300.625
4 3 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 13.79it/s]

train_loss=565598.875, valid_los218357.328, Model Save
epoch = 1, train_loss : 700821.207, valid_loss : 218357.328
train_loss=532997.625, valid_los217565.484, Model Save
epoch = 2, train_loss : 698314.570, valid_loss : 217565.484
train_loss=628800.312, valid_los216828.109, Model Save
epoch = 3, train_loss : 698341.095, valid_loss : 216828.109


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.08it/s]

epoch = 4, train_loss : 698594.377, valid_loss : 218018.219
train_loss=593770.250, valid_los216399.938, Model Save
epoch = 5, train_loss : 697437.889, valid_loss : 216399.938
4 4 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 13.15it/s]

train_loss=1097229.625, valid_los159867.469, Model Save
epoch = 1, train_loss : 1187318.997, valid_loss : 159867.469
epoch = 2, train_loss : 1179470.898, valid_loss : 160995.281
epoch = 3, train_loss : 1177599.416, valid_loss : 162314.906


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.58it/s]

epoch = 4, train_loss : 1173563.321, valid_loss : 162419.422
epoch = 5, train_loss : 1171776.696, valid_loss : 162989.953
4 4 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 15.26it/s]

train_loss=503982.062, valid_los170401.047, Model Save
epoch = 1, train_loss : 582240.418, valid_loss : 170401.047
train_loss=676620.312, valid_los167050.125, Model Save
epoch = 2, train_loss : 578739.448, valid_loss : 167050.125
train_loss=653570.188, valid_los165895.703, Model Save
epoch = 3, train_loss : 577726.031, valid_loss : 165895.703


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.62it/s]

train_loss=318890.812, valid_los165860.922, Model Save
epoch = 4, train_loss : 575921.939, valid_loss : 165860.922
train_loss=478502.000, valid_los164986.406, Model Save
epoch = 5, train_loss : 575912.488, valid_loss : 164986.406
4 5 0
4 5 1
2 0 0



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.31it/s]

train_loss=70820.125, valid_los111603.867, Model Save
epoch = 1, train_loss : 53058.792, valid_loss : 111603.867
epoch = 2, train_loss : 52599.427, valid_loss : 112125.352
epoch = 3, train_loss : 52258.716, valid_loss : 112261.617
epoch = 4, train_loss : 52128.875, valid_loss : 112533.094


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.85it/s]


epoch = 5, train_loss : 52053.471, valid_loss : 112478.727
2 0 1
2 1 0


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.49it/s]

train_loss=19200.152, valid_los272256.844, Model Save
epoch = 1, train_loss : 36917.004, valid_loss : 272256.844
train_loss=26315.098, valid_los270695.781, Model Save
epoch = 2, train_loss : 36788.338, valid_loss : 270695.781
train_loss=21922.553, valid_los269061.812, Model Save
epoch = 3, train_loss : 36637.097, valid_loss : 269061.812


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.70it/s]

train_loss=19641.047, valid_los267688.875, Model Save
epoch = 4, train_loss : 36501.953, valid_loss : 267688.875
train_loss=16919.789, valid_los266288.031, Model Save
epoch = 5, train_loss : 36373.836, valid_loss : 266288.031
2 1 1
2 2 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 15.74it/s]

train_loss=49392.781, valid_los233534.609, Model Save
epoch = 1, train_loss : 46054.710, valid_loss : 233534.609
train_loss=26227.975, valid_los233326.547, Model Save
epoch = 2, train_loss : 45864.060, valid_loss : 233326.547
train_loss=49124.676, valid_los232803.125, Model Save
epoch = 3, train_loss : 45857.460, valid_loss : 232803.125


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.49it/s]

train_loss=55656.688, valid_los232794.109, Model Save
epoch = 4, train_loss : 45799.226, valid_loss : 232794.109
train_loss=50962.164, valid_los232541.984, Model Save
epoch = 5, train_loss : 45714.130, valid_loss : 232541.984
2 2 1
2 3 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.92it/s]

train_loss=79902.305, valid_los166380.016, Model Save
epoch = 1, train_loss : 47647.637, valid_loss : 166380.016
train_loss=50540.109, valid_los166378.609, Model Save
epoch = 2, train_loss : 47424.571, valid_loss : 166378.609
train_loss=17289.070, valid_los165791.875, Model Save
epoch = 3, train_loss : 47228.014, valid_loss : 165791.875


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.88it/s]

train_loss=34722.992, valid_los165504.000, Model Save
epoch = 4, train_loss : 47224.478, valid_loss : 165504.000
train_loss=83437.383, valid_los165090.016, Model Save
epoch = 5, train_loss : 47335.418, valid_loss : 165090.016
2 3 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.70it/s]

train_loss=3288.758, valid_los14145.589, Model Save
epoch = 1, train_loss : 1628.163, valid_loss : 14145.589
train_loss=1420.151, valid_los14050.402, Model Save
epoch = 2, train_loss : 1512.329, valid_loss : 14050.402
train_loss=186.379, valid_los13988.173, Model Save
epoch = 3, train_loss : 1502.500, valid_loss : 13988.173


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.24it/s]

train_loss=338.571, valid_los13919.657, Model Save
epoch = 4, train_loss : 1498.495, valid_loss : 13919.657
train_loss=25.788, valid_los13851.610, Model Save
epoch = 5, train_loss : 1492.792, valid_loss : 13851.610
2 4 0



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.99it/s]

train_loss=56831.207, valid_los103920.031, Model Save
epoch = 1, train_loss : 40148.204, valid_loss : 103920.031
epoch = 2, train_loss : 39915.535, valid_loss : 104664.148
epoch = 3, train_loss : 39880.856, valid_loss : 104132.859
train_loss=23453.867, valid_los103682.930, Model Save
epoch = 4, train_loss : 39730.163, valid_loss : 103682.930


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.97it/s]


epoch = 5, train_loss : 39743.671, valid_loss : 103943.484
2 4 1


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.39it/s]

train_loss=6391.951, valid_los23.647, Model Save
epoch = 1, train_loss : 4131.752, valid_loss : 23.647
train_loss=5586.642, valid_los9.287, Model Save
epoch = 2, train_loss : 4051.921, valid_loss : 9.287
epoch = 3, train_loss : 4028.251, valid_loss : 9.420


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.36it/s]

train_loss=4320.768, valid_los6.277, Model Save
epoch = 4, train_loss : 4023.318, valid_loss : 6.277
epoch = 5, train_loss : 4002.862, valid_loss : 9.558
2 5 0
2 5 1
1 0 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.92it/s]

train_loss=40975.566, valid_los142255.000, Model Save
epoch = 1, train_loss : 26283.550, valid_loss : 142255.000
train_loss=37128.441, valid_los142143.516, Model Save
epoch = 2, train_loss : 26217.440, valid_loss : 142143.516
train_loss=29761.652, valid_los141931.188, Model Save
epoch = 3, train_loss : 26153.774, valid_loss : 141931.188
train_loss=20356.354, valid_los141470.250, Model Save


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.24it/s]

epoch = 4, train_loss : 26084.955, valid_loss : 141470.250
train_loss=34777.559, valid_los141412.203, Model Save
epoch = 5, train_loss : 26096.559, valid_loss : 141412.203
1 0 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 13.89it/s]

train_loss=148.378, valid_los218.711, Model Save
epoch = 1, train_loss : 598.093, valid_loss : 218.711
epoch = 2, train_loss : 592.722, valid_loss : 229.262
train_loss=985.809, valid_los213.454, Model Save
epoch = 3, train_loss : 592.632, valid_loss : 213.454


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.70it/s]

epoch = 4, train_loss : 587.575, valid_loss : 215.951
epoch = 5, train_loss : 587.146, valid_loss : 216.632
1 1 0
1 1 1
1 2 0
1 2 1
1 3 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 15.62it/s]

train_loss=16906.211, valid_los16403.895, Model Save
epoch = 1, train_loss : 11625.564, valid_loss : 16403.895
epoch = 2, train_loss : 11607.325, valid_loss : 16443.168
epoch = 3, train_loss : 11556.687, valid_loss : 16420.236


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.88it/s]

epoch = 4, train_loss : 11539.620, valid_loss : 16426.967
epoch = 5, train_loss : 11533.746, valid_loss : 16436.129
1 3 1
1 4 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.92it/s]

train_loss=16454.271, valid_los39385.785, Model Save
epoch = 1, train_loss : 19901.131, valid_loss : 39385.785
train_loss=27524.783, valid_los39346.719, Model Save
epoch = 2, train_loss : 19909.937, valid_loss : 39346.719
train_loss=16292.747, valid_los39320.391, Model Save
epoch = 3, train_loss : 19858.334, valid_loss : 39320.391


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.16it/s]

train_loss=13168.051, valid_los39311.293, Model Save
epoch = 4, train_loss : 19838.240, valid_loss : 39311.293
train_loss=9189.848, valid_los39293.395, Model Save
epoch = 5, train_loss : 19814.047, valid_loss : 39293.395
1 4 1
1 5 0



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.50it/s]

train_loss=19120.914, valid_los17884.902, Model Save
epoch = 1, train_loss : 15884.066, valid_loss : 17884.902
train_loss=14616.114, valid_los17843.236, Model Save
epoch = 2, train_loss : 15818.190, valid_loss : 17843.236
train_loss=15956.265, valid_los17832.559, Model Save
epoch = 3, train_loss : 15786.671, valid_loss : 17832.559
train_loss=10660.718, valid_los17689.969, Model Save
epoch = 4, train_loss : 15726.706, valid_loss : 17689.969


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.92it/s]


train_loss=34662.867, valid_los17664.457, Model Save
epoch = 5, train_loss : 15773.079, valid_loss : 17664.457
1 5 1
3 0 0


 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.56it/s]

train_loss=36828.266, valid_los58365.832, Model Save
epoch = 1, train_loss : 43869.247, valid_loss : 58365.832
epoch = 2, train_loss : 43695.957, valid_loss : 58504.434
epoch = 3, train_loss : 43423.865, valid_loss : 58687.926
epoch = 4, train_loss : 43250.081, valid_loss : 58868.117


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.45it/s]


epoch = 5, train_loss : 43323.138, valid_loss : 59089.539
3 0 1


 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.82it/s]

train_loss=11676.262, valid_los8683.945, Model Save
epoch = 1, train_loss : 9605.536, valid_loss : 8683.945
train_loss=2917.766, valid_los8644.270, Model Save
epoch = 2, train_loss : 9554.256, valid_loss : 8644.270
train_loss=7184.030, valid_los8641.880, Model Save
epoch = 3, train_loss : 9556.442, valid_loss : 8641.880
train_loss=12404.974, valid_los8638.094, Model Save
epoch = 4, train_loss : 9561.772, valid_loss : 8638.094


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.75it/s]


epoch = 5, train_loss : 9525.097, valid_loss : 8644.697
3 1 0
3 1 1
3 2 0
3 2 1


 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.97it/s]

train_loss=362.966, valid_los4162.368, Model Save
epoch = 1, train_loss : 577.611, valid_loss : 4162.368
train_loss=410.006, valid_los4152.822, Model Save
epoch = 2, train_loss : 568.471, valid_loss : 4152.822
train_loss=670.090, valid_los4142.586, Model Save
epoch = 3, train_loss : 567.848, valid_loss : 4142.586
train_loss=357.528, valid_los4129.222, Model Save
epoch = 4, train_loss : 565.393, valid_loss : 4129.222


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.75it/s]


train_loss=882.582, valid_los4118.514, Model Save
epoch = 5, train_loss : 565.728, valid_loss : 4118.514
3 3 0


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.39it/s]

train_loss=9293.326, valid_los14931.047, Model Save
epoch = 1, train_loss : 8799.200, valid_loss : 14931.047
train_loss=17165.463, valid_los14905.332, Model Save
epoch = 2, train_loss : 8813.654, valid_loss : 14905.332
train_loss=5396.739, valid_los14886.280, Model Save
epoch = 3, train_loss : 8768.677, valid_loss : 14886.280


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.16it/s]

train_loss=9709.245, valid_los14862.321, Model Save
epoch = 4, train_loss : 8776.351, valid_loss : 14862.321
train_loss=15839.375, valid_los14845.794, Model Save
epoch = 5, train_loss : 8793.237, valid_loss : 14845.794
3 3 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.60it/s]

train_loss=4903.464, valid_los11623.212, Model Save
epoch = 1, train_loss : 9221.150, valid_loss : 11623.212
train_loss=7682.928, valid_los11614.394, Model Save
epoch = 2, train_loss : 9223.826, valid_loss : 11614.394
train_loss=10582.117, valid_los11594.283, Model Save
epoch = 3, train_loss : 9228.696, valid_loss : 11594.283


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.16it/s]

epoch = 4, train_loss : 9203.523, valid_loss : 11618.318
epoch = 5, train_loss : 9215.180, valid_loss : 11604.827
3 4 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.81it/s]

train_loss=9275.710, valid_los23863.354, Model Save
epoch = 1, train_loss : 9454.163, valid_loss : 23863.354
train_loss=6229.116, valid_los23845.271, Model Save
epoch = 2, train_loss : 9435.309, valid_loss : 23845.271
train_loss=8376.561, valid_los23784.656, Model Save
epoch = 3, train_loss : 9437.113, valid_loss : 23784.656


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.45it/s]

epoch = 4, train_loss : 9422.433, valid_loss : 23847.518
epoch = 5, train_loss : 9430.023, valid_loss : 23807.947
3 4 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 13.70it/s]

train_loss=11007.428, valid_los24493.086, Model Save
epoch = 1, train_loss : 8836.068, valid_loss : 24493.086
train_loss=10707.569, valid_los24460.723, Model Save
epoch = 2, train_loss : 8823.727, valid_loss : 24460.723
epoch = 3, train_loss : 8842.337, valid_loss : 24462.598


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.92it/s]

train_loss=8214.097, valid_los24440.516, Model Save
epoch = 4, train_loss : 8798.121, valid_loss : 24440.516
epoch = 5, train_loss : 8815.696, valid_loss : 24459.707
3 5 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 15.03it/s]

train_loss=6638.525, valid_los5857.738, Model Save
epoch = 1, train_loss : 11348.546, valid_loss : 5857.738
train_loss=11534.460, valid_los5856.166, Model Save
epoch = 2, train_loss : 11335.419, valid_loss : 5856.166
train_loss=16023.409, valid_los5849.297, Model Save
epoch = 3, train_loss : 11328.990, valid_loss : 5849.297


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.28it/s]

epoch = 4, train_loss : 11306.447, valid_loss : 5898.806
epoch = 5, train_loss : 11286.135, valid_loss : 5898.999
3 5 1
0 0 0



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 15.09it/s]

train_loss=274924.125, valid_los156558.203, Model Save
epoch = 1, train_loss : 331465.495, valid_loss : 156558.203
epoch = 2, train_loss : 323921.672, valid_loss : 165722.797
epoch = 3, train_loss : 321318.018, valid_loss : 168634.609
epoch = 4, train_loss : 320513.471, valid_loss : 171027.906


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.75it/s]


epoch = 5, train_loss : 320181.765, valid_loss : 172761.438
0 0 1


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 16.39it/s]

train_loss=434797.719, valid_los272741.625, Model Save
epoch = 1, train_loss : 325421.423, valid_loss : 272741.625
train_loss=521272.875, valid_los272062.531, Model Save
epoch = 2, train_loss : 325297.824, valid_loss : 272062.531
train_loss=289351.812, valid_los271930.000, Model Save
epoch = 3, train_loss : 324319.218, valid_loss : 271930.000


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.62it/s]

epoch = 4, train_loss : 324340.557, valid_loss : 272259.781
train_loss=313189.250, valid_los271719.781, Model Save
epoch = 5, train_loss : 323924.357, valid_loss : 271719.781
0 1 0



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.77it/s]

train_loss=190214.219, valid_los174289.812, Model Save
epoch = 1, train_loss : 189499.130, valid_loss : 174289.812
train_loss=82468.953, valid_los171694.562, Model Save
epoch = 2, train_loss : 188157.675, valid_loss : 171694.562
train_loss=292751.156, valid_los170445.250, Model Save
epoch = 3, train_loss : 188612.934, valid_loss : 170445.250
epoch = 4, train_loss : 187884.962, valid_loss : 170470.141


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.41it/s]


train_loss=313420.438, valid_los170387.531, Model Save
epoch = 5, train_loss : 188210.735, valid_loss : 170387.531
0 1 1


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.49it/s]

train_loss=66656.164, valid_los19300.771, Model Save
epoch = 1, train_loss : 44392.612, valid_loss : 19300.771
train_loss=19713.406, valid_los19153.299, Model Save
epoch = 2, train_loss : 43946.907, valid_loss : 19153.299
train_loss=43737.059, valid_los19071.617, Model Save
epoch = 3, train_loss : 43921.046, valid_loss : 19071.617


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 14.53it/s]

epoch = 4, train_loss : 43780.151, valid_loss : 19112.686
epoch = 5, train_loss : 43625.864, valid_loss : 19078.816
0 2 0



 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:00<00:00, 14.31it/s]

train_loss=229875.172, valid_los242377.141, Model Save
epoch = 1, train_loss : 220502.916, valid_loss : 242377.141
epoch = 2, train_loss : 218238.055, valid_loss : 247421.547
epoch = 3, train_loss : 217831.152, valid_loss : 249466.812
epoch = 4, train_loss : 217373.749, valid_loss : 250447.859


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.89it/s]


epoch = 5, train_loss : 217244.634, valid_loss : 251124.766
0 2 1


 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 12.42it/s]

train_loss=34744.004, valid_los33819.609, Model Save
epoch = 1, train_loss : 28074.260, valid_loss : 33819.609
train_loss=3318.711, valid_los33282.711, Model Save
epoch = 2, train_loss : 27252.574, valid_loss : 33282.711
train_loss=39097.375, valid_los33217.406, Model Save
epoch = 3, train_loss : 27240.073, valid_loss : 33217.406


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 12.98it/s]

train_loss=23465.564, valid_los33147.949, Model Save
epoch = 4, train_loss : 27080.972, valid_loss : 33147.949
train_loss=60031.629, valid_los33074.770, Model Save
epoch = 5, train_loss : 27098.641, valid_loss : 33074.770
0 3 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 14.60it/s]

train_loss=311600.219, valid_los167473.531, Model Save
epoch = 1, train_loss : 284805.125, valid_loss : 167473.531
epoch = 2, train_loss : 279097.526, valid_loss : 173991.250
epoch = 3, train_loss : 277437.286, valid_loss : 177696.578


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 13.85it/s]

epoch = 4, train_loss : 276637.667, valid_loss : 178110.250
epoch = 5, train_loss : 276405.851, valid_loss : 179546.828
0 3 1
0 4 0



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 12.58it/s]

train_loss=236388.125, valid_los197492.766, Model Save
epoch = 1, train_loss : 322552.302, valid_loss : 197492.766
epoch = 2, train_loss : 322146.548, valid_loss : 197908.984
epoch = 3, train_loss : 322190.304, valid_loss : 197759.156


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 11.26it/s]

train_loss=270918.344, valid_los196103.156, Model Save
epoch = 4, train_loss : 321100.380, valid_loss : 196103.156
epoch = 5, train_loss : 320554.003, valid_loss : 196666.031
0 4 1



 40%|█████████████████████████████████▌                                                  | 2/5 [00:00<00:00, 12.58it/s]

train_loss=498862.125, valid_los290859.656, Model Save
epoch = 1, train_loss : 280895.307, valid_loss : 290859.656
train_loss=201215.172, valid_los289932.344, Model Save
epoch = 2, train_loss : 279437.512, valid_loss : 289932.344
train_loss=302939.438, valid_los289073.969, Model Save
epoch = 3, train_loss : 279683.940, valid_loss : 289073.969


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 12.13it/s]

epoch = 4, train_loss : 279089.679, valid_loss : 289330.281
train_loss=247366.719, valid_los288672.875, Model Save
epoch = 5, train_loss : 279267.221, valid_loss : 288672.875
0 5 0
0 5 1





In [33]:
def RMSE(y, y_pred):
    return mean_squared_error(y, y_pred)**0.5

In [34]:
rmse = []
m=0
for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():
            DLinear_model = model_l[m]
            if DLinear_model=='noExist':
                print(i,j,k)
            else:
                print(i,j,k)
                testX = build_test_dataset(np.array(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)][['price']].values[:28]))
                testX_tensor = torch.FloatTensor(testX)
                print(testX_tensor.shape)
                if testX_tensor.shape[1]==0:
                    break
                # 예측 테스트
                with torch.no_grad(): 
                    pred = []
                    for pr in range(len(testX_tensor)):
                        predicted = DLinear_model(torch.unsqueeze(testX_tensor[pr], 0))
                        predicted = torch.flatten(predicted)
                        pred.append(predicted)

                preds = pred[0].cpu().numpy().flatten()

                vmax = valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'timestamp'].max()            
                valid_data.loc[(valid_data.timestamp>vmax-28) & (valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = preds

            m=m+1

4 0 0
torch.Size([1, 28, 1])
4 0 1
torch.Size([1, 28, 1])
4 1 0
torch.Size([1, 28, 1])
4 1 1
torch.Size([1, 28, 1])
4 2 0
torch.Size([1, 28, 1])
4 2 1
torch.Size([1, 28, 1])
4 3 0
torch.Size([1, 28, 1])
4 3 1
torch.Size([1, 28, 1])
4 4 0
torch.Size([1, 28, 1])
4 4 1
torch.Size([1, 28, 1])
4 5 0
4 5 1
2 0 0
torch.Size([1, 28, 1])
2 0 1
2 1 0
torch.Size([1, 28, 1])
2 1 1
2 2 0
torch.Size([1, 28, 1])
2 2 1
2 3 0
torch.Size([1, 28, 1])
2 3 1
torch.Size([1, 28, 1])
2 4 0
torch.Size([1, 28, 1])
2 4 1
torch.Size([1, 28, 1])
2 5 0
2 5 1
1 0 0
torch.Size([1, 28, 1])
1 0 1
torch.Size([1, 28, 1])
1 1 0
1 1 1
1 2 0
1 2 1
1 3 0
torch.Size([1, 28, 1])
1 3 1
1 4 0
torch.Size([1, 28, 1])
1 4 1
1 5 0
torch.Size([1, 28, 1])
1 5 1
3 0 0
torch.Size([1, 28, 1])
3 0 1
torch.Size([1, 28, 1])
3 1 0
3 1 1
3 2 0
3 2 1
torch.Size([1, 28, 1])
3 3 0
torch.Size([1, 28, 1])
3 3 1
torch.Size([1, 28, 1])
3 4 0
torch.Size([1, 28, 1])
3 4 1
torch.Size([1, 28, 1])
3 5 0
torch.Size([1, 28, 1])
3 5 1
0 0 0
torch.Size([1, 2

In [35]:
valid_data = pd.concat([valid_data, valid_week], axis=1)     

In [36]:
for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():          
            if len(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values) ==0:
                break
                     
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-56:-28]) == 0:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0        
                
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-56:-28]== 0)>20:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0                    
    
valid_data.loc[valid_data.weekday==6, 'pred'] = 0    
valid_data.loc[valid_data.pred < 0,'pred'] = 0
valid_data = valid_data.fillna(0)

In [None]:
for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():          
            if len(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values) ==0:
                break
            
            print('MAE SCORE : ', RMSE(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values, valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].pred.values))
            rmse.append(RMSE(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values, (valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].pred.values)))

            fig = plt.figure(figsize=(8,3))
            plt.plot( valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values, label = 'price')
            plt.plot( valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].pred.values, label = 'pred')
            plt.show()

In [38]:
base = valid_data.timestamp.min()

eval_data = valid_data.loc[valid_data.timestamp>=base+28].copy()
print(np.sqrt(mean_squared_error(eval_data.price, eval_data.pred)))

public_data = eval_data.loc[eval_data.timestamp < base+ 28+14]
print(np.sqrt(mean_squared_error(public_data.price, public_data.pred)))

858.5948288725721
626.5670064123481


## test

In [39]:
rmse = []
m=0
for i in train_data.item.unique():
    DLinear_model = model_l[m]
    m=m+1
    if DLinear_model=='noExist':
        print(i,j,k)
    else:
        for j in train_data.corporation.unique():
            for k in train_data.location.unique():

                    print(i,j,k)
                    testX = build_test_dataset(np.array(test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k)][['price']].values[:28]))
                    testX_tensor = torch.FloatTensor(testX)
                    print(testX_tensor.shape)
                    if testX_tensor.shape[1]==0:
                        break
                    # 예측 테스트
                    with torch.no_grad(): 
                        pred = []
                        for pr in range(len(testX_tensor)):
                            predicted = DLinear_model(torch.unsqueeze(testX_tensor[pr], 0))
                            predicted = torch.flatten(predicted)
                            pred.append(predicted)

                    preds = pred[0].cpu().numpy().flatten()
        #             np.concatenate(np.zeros(28), preds)

                    vmax = test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'timestamp'].max()            
                    test_data.loc[(test_data.timestamp>vmax-28) & (test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = preds


4 0 0
torch.Size([1, 28, 1])
4 0 1
torch.Size([1, 28, 1])
4 1 0
torch.Size([1, 28, 1])
4 1 1
torch.Size([1, 28, 1])
4 2 0
torch.Size([1, 28, 1])
4 2 1
torch.Size([1, 28, 1])
4 3 0
torch.Size([1, 28, 1])
4 3 1
torch.Size([1, 28, 1])
4 4 0
torch.Size([1, 28, 1])
4 4 1
torch.Size([1, 28, 1])
4 5 0
torch.Size([1, 0, 1])
2 0 0
torch.Size([1, 28, 1])
2 0 1
torch.Size([1, 0, 1])
2 1 0
torch.Size([1, 28, 1])
2 1 1
torch.Size([1, 0, 1])
2 2 0
torch.Size([1, 28, 1])
2 2 1
torch.Size([1, 0, 1])
2 3 0
torch.Size([1, 28, 1])
2 3 1
torch.Size([1, 28, 1])
2 4 0
torch.Size([1, 28, 1])
2 4 1
torch.Size([1, 28, 1])
2 5 0
torch.Size([1, 0, 1])
1 0 0
torch.Size([1, 28, 1])
1 0 1
torch.Size([1, 28, 1])
1 1 0
torch.Size([1, 0, 1])
1 2 0
torch.Size([1, 0, 1])
1 3 0
torch.Size([1, 28, 1])
1 3 1
torch.Size([1, 0, 1])
1 4 0
torch.Size([1, 28, 1])
1 4 1
torch.Size([1, 0, 1])
1 5 0
torch.Size([1, 28, 1])
1 5 1
torch.Size([1, 0, 1])
3 0 0
torch.Size([1, 28, 1])
3 0 1
torch.Size([1, 28, 1])
3 1 0
torch.Size([1, 0, 

In [40]:
test_data = pd.concat([test_data, test_week], axis=1)     
for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():          
            if len(test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k)].price.values) ==0:
                break

            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:]) == 0:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0       
                
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:] == 0) > 23:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0                  
          
test_data.loc[test_data.weekday==6, 'pred'] = 0    
test_data.loc[test_data.pred < 0,'pred'] = 0

In [41]:
test_data = test_data.fillna(0)

In [42]:
for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():          
            if len(test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k)].price.values) ==0:
                break
            
            rmse.append(RMSE(test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k)].price.values, (test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k)].pred.values)))


test_data = test_data.fillna(0)

In [None]:
for i in test_data.item.unique():
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():
            print('item : ',i,'corpolation : ',j,'location : ', k)
            plt.figure(figsize=(10,3))
            plt.plot(valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].timestamp,
                     valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].price)
            plt.plot(test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].timestamp,
                     test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].pred)
            plt.show()

In [44]:
base = test_data.timestamp.min()
test_data = test_data.loc[test_data.timestamp>=base+28]

In [45]:
test_data

Unnamed: 0,timestamp,item,corporation,location,price,pred,weekday
1092,1523,4,0,0,0.0,3205.659180,5
1093,1524,4,0,0,0.0,0.000000,6
1094,1525,4,0,0,0.0,2918.623535,0
1095,1526,4,0,0,0.0,3185.242188,1
1096,1527,4,0,0,0.0,2965.739746,2
...,...,...,...,...,...,...,...
2179,1546,3,5,0,0.0,430.991364,0
2180,1547,3,5,0,0.0,429.288422,1
2181,1548,3,5,0,0.0,431.629730,2
2182,1549,3,5,0,0.0,412.613281,3


In [46]:
submission = pd.read_csv(path+'sample_submission.csv')
submission['answer'] = test_data.pred.values
submission.to_csv(path+'submit_dl.csv',index=False)

# 머신러닝 모델

In [47]:
train_data = pd.read_csv(path+'train.csv')
train_data = train_data.rename(columns={'supply(kg)':'supply', 'price(원/kg)':'price'})
train_data

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0
...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,2023-02-27,RD,F,J,452440.0,468.0
59393,RD_F_J_20230228,2023-02-28,RD,F,J,421980.0,531.0
59394,RD_F_J_20230301,2023-03-01,RD,F,J,382980.0,574.0
59395,RD_F_J_20230302,2023-03-02,RD,F,J,477220.0,523.0


In [48]:
# 선행 기간 데이터를 처리하기 위해 test에 train데이터 통합
test_data = pd.read_csv(path+'test.csv')
test_data = test_data.rename(columns={'supply(kg)':'supply', 'price(원/kg)':'price'})
test_data = pd.concat([train_data, test_data],axis=0,ignore_index=True).reset_index(drop=True)
test_data

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price
0,TG_A_J_20190101,2019-01-01,TG,A,J,0.0,0.0
1,TG_A_J_20190102,2019-01-02,TG,A,J,0.0,0.0
2,TG_A_J_20190103,2019-01-03,TG,A,J,60601.0,1728.0
3,TG_A_J_20190104,2019-01-04,TG,A,J,25000.0,1408.0
4,TG_A_J_20190105,2019-01-05,TG,A,J,32352.0,1250.0
...,...,...,...,...,...,...,...
60484,RD_F_J_20230327,2023-03-27,RD,F,J,,
60485,RD_F_J_20230328,2023-03-28,RD,F,J,,
60486,RD_F_J_20230329,2023-03-29,RD,F,J,,
60487,RD_F_J_20230330,2023-03-30,RD,F,J,,


In [49]:
#학습데이터 생성
trade = pd.read_csv(path+'international_trade.csv')
trade.columns = ['기간','x','c1','c2','c3','c4','c5']
trade = trade[['기간','x','c5']]

trade = pd.pivot_table(trade,
                         index='기간',
                         columns='x',
                         values=['c5'],
                         aggfunc='sum')
trade.columns = [ 'c'+str(i) for i,(x1,x2) in enumerate(trade.columns)]
trade_columns = trade.columns
trade = trade.fillna(0)

train_data['기간'] = train_data.timestamp.apply(lambda x : str(x)[:7])
train_data = pd.merge(train_data, trade, on='기간', how='left')
train_data = train_data.drop('기간',axis=1)

train_data['item'] = train_data['item'].astype('str')
train_data['corporation'] = train_data['corporation'].astype('str')
train_data['location'] = train_data['location'].astype('str')

train_data['year'] = train_data['timestamp'].apply(lambda x: x[:4]).astype('int')
train_data['month'] = train_data['timestamp'].apply(lambda x: x[5:7]).astype('int')
train_data['day'] = train_data['timestamp'].apply(lambda x: x[8:10]).astype('int')

train_data['ts'] = train_data.apply(lambda x : pd.Timestamp(year=x.year, month=x.month, day=x.day),axis=1)
train_data['weekday'] = train_data['ts'].dt.weekday
train_data['holiday'] = train_data['weekday'].apply(lambda x : 1 if x ==6 else 0)
train_data = train_data.drop('ts',axis=1)
kr_holidays = holidays.KR()
train_data['holiday'] = train_data.apply(lambda x : 1 if x.timestamp in kr_holidays else x.holiday, axis=1)

itemEncoder = LabelEncoder()
train_data['item'] = itemEncoder.fit_transform(train_data['item'])
corporationEncoder = LabelEncoder()
train_data['corporation'] = corporationEncoder.fit_transform(train_data['corporation'])
locationEncoder = LabelEncoder()
train_data['location'] = locationEncoder.fit_transform(train_data['location'])

ddict = {}
for i,x in enumerate(train_data['timestamp'].unique()):
  ddict[x] = i
train_data['timestamp'] = train_data['timestamp'].map(ddict)

# item에 무관한 전체 데이터
train_full = train_data.copy()

train_full

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,c0,c1,c2,...,c29,c30,c31,c32,c33,year,month,day,weekday,holiday
0,TG_A_J_20190101,0,4,0,0,0.0,0.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,1,1,1
1,TG_A_J_20190102,1,4,0,0,0.0,0.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,2,2,0
2,TG_A_J_20190103,2,4,0,0,60601.0,1728.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,3,3,0
3,TG_A_J_20190104,3,4,0,0,25000.0,1408.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,4,4,0
4,TG_A_J_20190105,4,4,0,0,32352.0,1250.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,5,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59392,RD_F_J_20230227,1518,3,5,0,452440.0,468.0,-64.0,-867.0,0.0,...,106.0,3.0,0.0,-4890.0,-71.0,2023,2,27,0,0
59393,RD_F_J_20230228,1519,3,5,0,421980.0,531.0,-64.0,-867.0,0.0,...,106.0,3.0,0.0,-4890.0,-71.0,2023,2,28,1,0
59394,RD_F_J_20230301,1520,3,5,0,382980.0,574.0,,,,...,,,,,,2023,3,1,2,1
59395,RD_F_J_20230302,1521,3,5,0,477220.0,523.0,,,,...,,,,,,2023,3,2,3,0


In [50]:
# test 데이터 생성
test_data['기간'] = test_data.timestamp.apply(lambda x : str(x)[:7])
test_data = pd.merge(test_data, trade, on='기간', how='left')
test_data = test_data.drop('기간',axis=1)

test_data['item'] = test_data['item'].astype('str')
test_data['corporation'] = test_data['corporation'].astype('str')
test_data['location'] = test_data['location'].astype('str')

test_data['year'] = test_data['timestamp'].apply(lambda x: x[:4]).astype('int')
test_data['month'] = test_data['timestamp'].apply(lambda x: x[5:7]).astype('int')
test_data['day'] = test_data['timestamp'].apply(lambda x: x[8:10]).astype('int')

test_data['ts'] = test_data.apply(lambda x : pd.Timestamp(year=x.year, month=x.month, day=x.day),axis=1)
test_data['weekday'] = test_data['ts'].dt.weekday
test_data['holiday'] = test_data['weekday'].apply(lambda x : 1 if x ==6 else 0)
test_data = test_data.drop('ts',axis=1)
kr_holidays = holidays.KR()
test_data['holiday'] = test_data.apply(lambda x : 1 if x.timestamp in kr_holidays else x.holiday, axis=1)

test_data['item'] = itemEncoder.transform(test_data['item'])
test_data['corporation'] = corporationEncoder.transform(test_data['corporation'])
test_data['location'] = locationEncoder.transform(test_data['location'])

ddict = {}
for i,x in enumerate(test_data['timestamp'].unique()):
  ddict[x] = i
test_data['timestamp'] = test_data['timestamp'].map(ddict)
# item에 무관한 전체 데이터
test_full = test_data.copy()

test_full

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,c0,c1,c2,...,c29,c30,c31,c32,c33,year,month,day,weekday,holiday
0,TG_A_J_20190101,0,4,0,0,0.0,0.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,1,1,1
1,TG_A_J_20190102,1,4,0,0,0.0,0.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,2,2,0
2,TG_A_J_20190103,2,4,0,0,60601.0,1728.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,3,3,0
3,TG_A_J_20190104,3,4,0,0,25000.0,1408.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,4,4,0
4,TG_A_J_20190105,4,4,0,0,32352.0,1250.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,5,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60484,RD_F_J_20230327,1546,3,5,0,,,,,,...,,,,,,2023,3,27,0,0
60485,RD_F_J_20230328,1547,3,5,0,,,,,,...,,,,,,2023,3,28,1,0
60486,RD_F_J_20230329,1548,3,5,0,,,,,,...,,,,,,2023,3,29,2,0
60487,RD_F_J_20230330,1549,3,5,0,,,,,,...,,,,,,2023,3,30,3,0


In [51]:
def prep_for_train(df, lag, items, VALID=False):
    train = df.copy()

    if 4 in items:
        # 감귤은 무역수지와 무관
        train = train.drop(trade_columns,axis=1)
        
        df1= train_full.groupby(['item','corporation','month','day']).mean().reset_index()[['item','corporation','month','day','price']].rename(columns={'price':'mean_m_price'})
        train = pd.merge(train,df1, on=['item','corporation','month','day'], how='left')
        df1= train_full.groupby(['item','corporation','weekday']).mean().reset_index()[['item','corporation','weekday','price']].rename(columns={'price':'mean_w_price'})
        train = pd.merge(train,df1, on=['item','corporation','weekday'], how='left')
        df1= train_full.groupby(['item','corporation','month','weekday']).mean().reset_index()[['item','corporation','month','weekday','price']].rename(columns={'price':'mean_mw_price'})
        train = pd.merge(train,df1, on=['item','corporation','month','weekday'], how='left')
        df1= train_full.groupby(['item','month','day']).mean().reset_index()[['item','month','day','price']].rename(columns={'price':'mean_im_price'})
        train = pd.merge(train,df1, on=['item','month','day'], how='left')
        df1= train_full.groupby(['item','month','weekday']).mean().reset_index()[['item','month','weekday','price']].rename(columns={'price':'mean_imw_price'})
        train = pd.merge(train,df1, on=['item','month','weekday'], how='left')
        df1= train_full.groupby(['corporation','month','day']).mean().reset_index()[['corporation','month','day','price']].rename(columns={'price':'mean_cm_price'})
        train = pd.merge(train,df1, on=['corporation','month','day'], how='left')
        df1= train_full.groupby(['corporation','month','weekday']).mean().reset_index()[['corporation','month','weekday','price']].rename(columns={'price':'mean_cmw_price'})
        train = pd.merge(train,df1, on=['corporation','month','weekday'], how='left')     
    elif 0 in items:
        # 무역수지 반영
        for col in trade_columns:
            train[col] = train.groupby(['item','corporation','location'])[col].transform(lambda x : x.shift(lag+1) )
            
        
        df1= train_full.groupby(['item','corporation','month','day']).mean().reset_index()[['item','corporation','month','day','price']].rename(columns={'price':'mean_m_price'})
        train = pd.merge(train,df1, on=['item','corporation','month','day'], how='left')
        df1= train_full.groupby(['item','corporation','weekday']).mean().reset_index()[['item','corporation','weekday','price']].rename(columns={'price':'mean_w_price'})
        train = pd.merge(train,df1, on=['item','corporation','weekday'], how='left')
        df1= train_full.groupby(['item','corporation','month','weekday']).mean().reset_index()[['item','corporation','month','weekday','price']].rename(columns={'price':'mean_mw_price'})
        train = pd.merge(train,df1, on=['item','corporation','month','weekday'], how='left')
        df1= train_full.groupby(['item','month','day']).mean().reset_index()[['item','month','day','price']].rename(columns={'price':'mean_im_price'})
        train = pd.merge(train,df1, on=['item','month','day'], how='left')
        df1= train_full.groupby(['item','month','weekday']).mean().reset_index()[['item','month','weekday','price']].rename(columns={'price':'mean_imw_price'})
        train = pd.merge(train,df1, on=['item','month','weekday'], how='left')
        df1= train_full.groupby(['corporation','month','day']).mean().reset_index()[['corporation','month','day','price']].rename(columns={'price':'mean_cm_price'})
        train = pd.merge(train,df1, on=['corporation','month','day'], how='left')
        df1= train_full.groupby(['corporation','month','weekday']).mean().reset_index()[['corporation','month','weekday','price']].rename(columns={'price':'mean_cmw_price'})
        train = pd.merge(train,df1, on=['corporation','month','weekday'], how='left')             
    else:
        # 무역수지 반영
        for col in trade_columns:
            train[col] = train.groupby(['item','corporation','location'])[col].transform(lambda x : x.shift(lag+1) )
            
        df1= train_full.groupby(['item','corporation','location','month','day']).mean().reset_index()[['item','corporation','location','month','day','price']].rename(columns={'price':'mean_m_price'})
        train = pd.merge(train,df1, on=['item','corporation','location','month','day'], how='left')
        df1= train_full.groupby(['item','corporation','location','weekday']).mean().reset_index()[['item','corporation','location','weekday','price']].rename(columns={'price':'mean_w_price'})
        train = pd.merge(train,df1, on=['item','corporation','location','weekday'], how='left')
        df1= train_full.groupby(['item','corporation','location','month','weekday']).mean().reset_index()[['item','corporation','location','month','weekday','price']].rename(columns={'price':'mean_mw_price'})
        train = pd.merge(train,df1, on=['item','corporation','location','month','weekday'], how='left')
        df1= train_full.groupby(['item','month','day']).mean().reset_index()[['item','month','day','price']].rename(columns={'price':'mean_im_price'})
        train = pd.merge(train,df1, on=['item','month','day'], how='left')
        df1= train_full.groupby(['item','month','weekday']).mean().reset_index()[['item','month','weekday','price']].rename(columns={'price':'mean_imw_price'})
        train = pd.merge(train,df1, on=['item','month','weekday'], how='left')

    for i in range(1,29):
        col = 'p_'+str(i)
        train[col] = train.groupby(['item','corporation','location'])['price'].transform(lambda x : x.shift(lag+i))   
    train = train.drop(['ID','supply','day'],axis=1)

    train['item'] = train['item'].astype('str')
    train['corporation'] = train['corporation'].astype('str')
    train['location'] = train['location'].astype('str')
    train['year'] = train['year'].astype('str')
    train['month'] = train['month'].astype('str')
    train['weekday'] = train['weekday'].astype('str')
    
    train = pd.get_dummies(train)  
    
    train = train.interpolate('values')
    train = train.interpolate('bfill')             
    
    if VALID:
        valid = train.loc[(train.timestamp > train.timestamp.max()-28)]            
        train = train.loc[train.timestamp <= train.timestamp.max()-28]   
    else:
        valid = train.loc[(train.timestamp > train.timestamp.max()-28)]  
        
    return train, valid

In [52]:
def prep_for_test(df, lag, items, VALID=False):
    # 입력dataframe은 train과 test의 통합데이터
    test = df.copy()
    
    if 4 in items :
        # 감귤은 무역수지와 무관
        test = test.drop(trade_columns,axis=1)
        
        df1= train_full.groupby(['item','corporation','month','day']).mean().reset_index()[['item','corporation','month','day','price']].rename(columns={'price':'mean_m_price'})
        test = pd.merge(test,df1, on=['item','corporation','month','day'], how='left')
        df1= train_full.groupby(['item','corporation','weekday']).mean().reset_index()[['item','corporation','weekday','price']].rename(columns={'price':'mean_w_price'})
        test = pd.merge(test,df1, on=['item','corporation','weekday'], how='left')
        df1= train_full.groupby(['item','corporation','month','weekday']).mean().reset_index()[['item','corporation','month','weekday','price']].rename(columns={'price':'mean_mw_price'})
        test = pd.merge(test,df1, on=['item','corporation','month','weekday'], how='left')
        df1= train_full.groupby(['item','month','day']).mean().reset_index()[['item','month','day','price']].rename(columns={'price':'mean_im_price'})
        test = pd.merge(test,df1, on=['item','month','day'], how='left')
        df1= train_full.groupby(['item','month','weekday']).mean().reset_index()[['item','month','weekday','price']].rename(columns={'price':'mean_imw_price'})
        test = pd.merge(test,df1, on=['item','month','weekday'], how='left')
        df1= train_full.groupby(['corporation','month','day']).mean().reset_index()[['corporation','month','day','price']].rename(columns={'price':'mean_cm_price'})
        test = pd.merge(test,df1, on=['corporation','month','day'], how='left')
        df1= train_full.groupby(['corporation','month','weekday']).mean().reset_index()[['corporation','month','weekday','price']].rename(columns={'price':'mean_cmw_price'})
        test = pd.merge(test,df1, on=['corporation','month','weekday'], how='left')     
    elif 0 in items:
        # 무역수지 반영
        for col in trade_columns:
            test[col] = test.groupby(['item','corporation','location'])[col].transform(lambda x : x.shift(lag+1) )
            
        df1= train_full.groupby(['item','corporation','month','day']).mean().reset_index()[['item','corporation','month','day','price']].rename(columns={'price':'mean_m_price'})
        test = pd.merge(test,df1, on=['item','corporation','month','day'], how='left')
        df1= train_full.groupby(['item','corporation','weekday']).mean().reset_index()[['item','corporation','weekday','price']].rename(columns={'price':'mean_w_price'})
        test = pd.merge(test,df1, on=['item','corporation','weekday'], how='left')
        df1= train_full.groupby(['item','corporation','month','weekday']).mean().reset_index()[['item','corporation','month','weekday','price']].rename(columns={'price':'mean_mw_price'})
        test = pd.merge(test,df1, on=['item','corporation','month','weekday'], how='left')
        df1= train_full.groupby(['item','month','day']).mean().reset_index()[['item','month','day','price']].rename(columns={'price':'mean_im_price'})
        test = pd.merge(test,df1, on=['item','month','day'], how='left')
        df1= train_full.groupby(['item','month','weekday']).mean().reset_index()[['item','month','weekday','price']].rename(columns={'price':'mean_imw_price'})
        test = pd.merge(test,df1, on=['item','month','weekday'], how='left')
        df1= train_full.groupby(['corporation','month','day']).mean().reset_index()[['corporation','month','day','price']].rename(columns={'price':'mean_cm_price'})
        test = pd.merge(test,df1, on=['corporation','month','day'], how='left')
        df1= train_full.groupby(['corporation','month','weekday']).mean().reset_index()[['corporation','month','weekday','price']].rename(columns={'price':'mean_cmw_price'})
        test = pd.merge(test,df1, on=['corporation','month','weekday'], how='left')          
    else:
        # 무역수지 반영
        for col in trade_columns:
            test[col] = test.groupby(['item','corporation','location'])[col].transform(lambda x : x.shift(lag+1) )
            
        df1= train_full.groupby(['item','corporation','location','month','day']).mean().reset_index()[['item','corporation','location','month','day','price']].rename(columns={'price':'mean_m_price'})
        test = pd.merge(test,df1, on=['item','corporation','location','month','day'], how='left')
        df1= train_full.groupby(['item','corporation','location','weekday']).mean().reset_index()[['item','corporation','location','weekday','price']].rename(columns={'price':'mean_w_price'})
        test = pd.merge(test,df1, on=['item','corporation','location','weekday'], how='left')
        df1= train_full.groupby(['item','corporation','location','month','weekday']).mean().reset_index()[['item','corporation','location','month','weekday','price']].rename(columns={'price':'mean_mw_price'})
        test = pd.merge(test,df1, on=['item','corporation','location','month','weekday'], how='left')
        df1= train_full.groupby(['item','month','day']).mean().reset_index()[['item','month','day','price']].rename(columns={'price':'mean_im_price'})
        test = pd.merge(test,df1, on=['item','month','day'], how='left')
        df1= train_full.groupby(['item','month','weekday']).mean().reset_index()[['item','month','weekday','price']].rename(columns={'price':'mean_imw_price'})
        test = pd.merge(test,df1, on=['item','month','weekday'], how='left')

    for i in range(1,29):
        col = 'p_'+str(i)
        test[col] = test.groupby(['item','corporation','location'])['price'].transform(lambda x : x.shift(lag+i))   
    test = test.drop(['ID','supply','day'],axis=1)

    test['item'] = test['item'].astype('str')
    test['corporation'] = test['corporation'].astype('str')
    test['location'] = test['location'].astype('str')
    test['year'] = test['year'].astype('str')
    test['month'] = test['month'].astype('str')
    test['weekday'] = test['weekday'].astype('str')
    
    test = pd.get_dummies(test)  
    
    test = test.interpolate('values')
    test = test.interpolate('bfill')             
    
    test = test.loc[test.timestamp > test.timestamp.max()-28]

    return test

In [53]:
def RMSE(y, y_pred):
    return mean_squared_error(y, y_pred)**0.5

## 감귤

In [54]:
items = [4]
train_data = train_full.loc[train_full['item'].isin(items)]
valid_data = train_data.loc[train_data.timestamp > train_data.timestamp.max()-28]    

In [55]:
def xgb_model(n_estimators, max_depth, colsample_bytree, subsample, seed, VALID=False):
    model_xgb = []
    print('n_estimators %d , max_depth %d , colsample_bytree %d , subsample %d , seed %d '%(n_estimators, max_depth, colsample_bytree, subsample, seed))
    for lag in range(28):
        print('lag : ',lag)
        train, valid = prep_for_train(train_data, lag, items,VALID=VALID)
        train = train.drop(['timestamp'],axis=1)  
        valid = valid.drop(['timestamp'],axis=1)      

        X_train = train.drop('price',axis=1)
        y_train = train['price']
        X_valid = valid.drop('price',axis=1)
        y_valid = valid['price']    
        xgb_reg = XGBRegressor(n_estimators = n_estimators,  max_depth = max_depth, colsample_bytree = colsample_bytree,
                               subsample = subsample, seed=seed)
        xgb_reg.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_valid, y_valid)],
                early_stopping_rounds=100,
               verbose=False)
        model_xgb.append(xgb_reg)
    return model_xgb

model_xgb = []
model_xgb.append(xgb_model(n_estimators = 200,  max_depth = 6, colsample_bytree = 0.7, subsample = 0.7, seed=0))
model_xgb.append(xgb_model(n_estimators = 400,  max_depth = 7, colsample_bytree = 0.7, subsample = 0.7, seed=0))
model_xgb.append(xgb_model(n_estimators = 300,  max_depth = 7, colsample_bytree = 0.8, subsample = 0.8, seed=0))    

n_estimators 200 , max_depth 6 , colsample_bytree 0 , subsample 0 , seed 0 
lag :  0




lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27
n_estimators 400 , max_depth 7 , colsample_bytree 0 , subsample 0 , seed 0 
lag :  0
lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27
n_estimators 300 , max_depth 7 , colsample_bytree 0 , subsample 0 , seed 0 
lag :  0
lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27


In [56]:
train_data

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,c0,c1,c2,...,c29,c30,c31,c32,c33,year,month,day,weekday,holiday
0,TG_A_J_20190101,0,4,0,0,0.0,0.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,1,1,1
1,TG_A_J_20190102,1,4,0,0,0.0,0.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,2,2,0
2,TG_A_J_20190103,2,4,0,0,60601.0,1728.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,3,3,0
3,TG_A_J_20190104,3,4,0,0,25000.0,1408.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,4,4,0
4,TG_A_J_20190105,4,4,0,0,32352.0,1250.0,172.0,-70.0,0.0,...,0.0,0.0,990.0,-4461.0,-123.0,2019,1,5,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15225,TG_E_S_20230227,1518,4,4,1,24204.0,3418.0,-64.0,-867.0,0.0,...,106.0,3.0,0.0,-4890.0,-71.0,2023,2,27,0,0
15226,TG_E_S_20230228,1519,4,4,1,13587.0,3141.0,-64.0,-867.0,0.0,...,106.0,3.0,0.0,-4890.0,-71.0,2023,2,28,1,0
15227,TG_E_S_20230301,1520,4,4,1,16187.0,4235.0,,,,...,,,,,,2023,3,1,2,1
15228,TG_E_S_20230302,1521,4,4,1,17830.0,3960.0,,,,...,,,,,,2023,3,2,3,0


In [None]:
test_input = test_full.loc[test_full['item'].isin(items)]
test_data = test_input.loc[test_input.timestamp > test_input.timestamp.max()-28]

models = model_xgb
base = test_data.timestamp.min()
test_data['pred'] = 0
for model_l in models:
    for lag in range(28):
        print('lag : ',lag)
        test = prep_for_test(test_input, lag, items)
       
        model = model_l[lag]

        test = test.loc[test.timestamp == base+lag]
        test = test.drop(['timestamp'],axis=1)      

        X_test = test.drop('price',axis=1)
        y_test = test['price']

        pred = model.predict(X_test)
        test_data.loc[test_data.timestamp == base+lag,'pred'] \
            = test_data.loc[test_data.timestamp == base+lag,'pred'] + pred*1/len(models)

    test_data.loc[test_data.weekday==6, 'pred'] = 0

for i in test_data.item.unique():
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():       
            # 이전 기간에 0이 많으면 예측가격도 0으로 처리         
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:]) == 0:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0      
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:] == 0) > 23:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0                  

# 일요일 가격은 0으로 처리
test_data.loc[test_data.weekday==6, 'pred'] = 0    

for i in test_data.item.unique():
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():
            print('item : ',i,'corpolation : ',j,'location : ', k)
            plt.figure(figsize=(10,3))
            plt.plot(valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].timestamp,
                     valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].price)
            plt.plot(test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].timestamp,
                     test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].pred)
            plt.show()

In [58]:
submission = pd.read_csv(path+'sample_submission.csv')
submission = pd.concat([submission, test_full.loc[test_full.timestamp > test_full.timestamp.max()-28].reset_index(drop=True)[['timestamp','item','corporation','location']]], axis=1)
submission.loc[submission['item'].isin(items),'answer'] = test_data['pred'].values
submission.loc[submission['item'].isin(items)]

Unnamed: 0,ID,answer,timestamp,item,corporation,location
0,TG_A_J_20230304,3757.690918,1523,4,0,0
1,TG_A_J_20230305,0.000000,1524,4,0,0
2,TG_A_J_20230306,3749.636963,1525,4,0,0
3,TG_A_J_20230307,3290.240601,1526,4,0,0
4,TG_A_J_20230308,3322.712891,1527,4,0,0
...,...,...,...,...,...,...
275,TG_E_S_20230327,4675.059204,1546,4,4,1
276,TG_E_S_20230328,4218.586548,1547,4,4,1
277,TG_E_S_20230329,5267.090942,1548,4,4,1
278,TG_E_S_20230330,5065.769897,1549,4,4,1


In [None]:
# 과거 동월 데이터 형태와 비교
for i in [4]:
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():
            plt.figure(figsize=(10,3))
            print('item : ',i,'corpolation : ',j,'location : ', k)            
            train_data.loc[(train_data.item==4) & (train_data.corporation==j) & (train_data.location==k) & 
                           (train_data.month==3)  ].groupby(['month','day']).mean()['price'].plot()
            plt.show()

In [None]:
for i in [4]:
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():
            print('item : ',i,'corpolation : ',j,'location : ', k)
            plt.figure(figsize=(10,3))
            plt.plot(valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].timestamp,
                     valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].price)

            plt.plot(submission.loc[(submission.item==i) &
                (submission.corporation==j) & (submission.location==k)].timestamp,
                     submission.loc[(submission.item==i) &
                (submission.corporation==j) & (submission.location==k)].answer)  
            plt.show()

In [61]:
submission.to_csv('submit_TG.csv',index=False)

### validation

In [62]:
items = [4]
train_data = train_full.loc[train_full['item'].isin(items)]
valid_data = train_data.loc[train_data.timestamp > train_data.timestamp.max()-28]    

model_xgb = []
model_xgb.append(xgb_model(n_estimators = 200,  max_depth = 6, colsample_bytree = 0.7, subsample = 0.7, seed=0, VALID=True))

n_estimators 200 , max_depth 6 , colsample_bytree 0 , subsample 0 , seed 0 
lag :  0




lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27


In [63]:
models = model_xgb
base = valid_data.timestamp.min()
valid_data['pred'] = 0
for model_l in models:
    for lag in range(28):
        print('lag : ',lag)
        train, valid = prep_for_train(train_data, lag, items,VALID=True)

        model = model_l[lag]

        valid = valid.loc[valid.timestamp == base+lag]
        valid = valid.drop(['timestamp'],axis=1)      

        X_valid = valid.drop('price',axis=1)
        y_valid = valid['price']

        pred = model.predict(X_valid)
        valid_data.loc[valid_data.timestamp == base+lag,'pred'] \
            = valid_data.loc[valid_data.timestamp == base+lag,'pred'] + pred*1/len(models)
        valid_data.loc[valid_data.weekday==6, 'pred'] = 0   

for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():          
            if len(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values) ==0:
                break
                    
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:]) == 0:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0       
                
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:] == 0) > 23:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0                  
          
valid_data.loc[valid_data.weekday==6, 'pred'] = 0    
valid_data.loc[valid_data.pred < 0,'pred'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_data['pred'] = 0


lag :  0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27


In [64]:
eval_data = valid_data.loc[valid_data.timestamp < base+ 28].copy()
print(np.sqrt(mean_squared_error(eval_data.price, eval_data.pred)))

public_data = eval_data.loc[eval_data.timestamp < base+ 14]
print(np.sqrt(mean_squared_error(public_data.price, public_data.pred)))

1013.293131418599
932.094229085444


In [65]:
eval_data

Unnamed: 0,ID,timestamp,item,corporation,location,supply,price,c0,c1,c2,...,c30,c31,c32,c33,year,month,day,weekday,holiday,pred
1495,TG_A_J_20230204,1495,4,0,0,30595.0,1864.0,-64.0,-867.0,0.0,...,3.0,0.0,-4890.0,-71.0,2023,2,4,5,0,1991.585693
1496,TG_A_J_20230205,1496,4,0,0,0.0,0.0,-64.0,-867.0,0.0,...,3.0,0.0,-4890.0,-71.0,2023,2,5,6,1,0.000000
1497,TG_A_J_20230206,1497,4,0,0,17973.0,1837.0,-64.0,-867.0,0.0,...,3.0,0.0,-4890.0,-71.0,2023,2,6,0,0,1956.214355
1498,TG_A_J_20230207,1498,4,0,0,10665.0,1595.0,-64.0,-867.0,0.0,...,3.0,0.0,-4890.0,-71.0,2023,2,7,1,0,1935.163696
1499,TG_A_J_20230208,1499,4,0,0,8866.0,1747.0,-64.0,-867.0,0.0,...,3.0,0.0,-4890.0,-71.0,2023,2,8,2,0,2398.573486
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15225,TG_E_S_20230227,1518,4,4,1,24204.0,3418.0,-64.0,-867.0,0.0,...,3.0,0.0,-4890.0,-71.0,2023,2,27,0,0,2717.498779
15226,TG_E_S_20230228,1519,4,4,1,13587.0,3141.0,-64.0,-867.0,0.0,...,3.0,0.0,-4890.0,-71.0,2023,2,28,1,0,3140.773926
15227,TG_E_S_20230301,1520,4,4,1,16187.0,4235.0,,,,...,,,,,2023,3,1,2,1,2716.612793
15228,TG_E_S_20230302,1521,4,4,1,17830.0,3960.0,,,,...,,,,,2023,3,2,3,0,3470.535156


In [None]:
valid_tot= valid_data.loc[valid_data.timestamp < base+ 28].reset_index(drop=True)
for i in valid_tot.item.unique():
    for j in valid_tot.corporation.unique():
        for k in valid_tot.location.unique():
            if len(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values) ==0:
                break            
            print('item : ',i,'corpolation : ',j,'location : ', k)
            print('MAE SCORE : ', RMSE(valid_tot.loc[(valid_tot.item==i) & (valid_tot.corporation==j) & (valid_tot.location==k)].price.values, \
                                       valid_tot.loc[(valid_tot.item==i) & (valid_tot.corporation==j) & (valid_tot.location==k)].pred.values))
    
            plt.figure(figsize=(10,2))
            plt.plot(valid_tot.loc[(valid_tot.item==i) &
                (valid_tot.corporation==j) & (valid_tot.location==k)].reset_index(drop=True).price)
            plt.plot(valid_tot.loc[(valid_tot.item==i) &
                (valid_tot.corporation==j) & (valid_tot.location==k)].reset_index(drop=True).pred)
            plt.show()

## 브로코리

In [67]:
items = [0]
train_data = train_full.loc[train_full['item'].isin(items)]
valid_data = train_data.loc[train_data.timestamp > train_data.timestamp.max()-28]    

In [68]:
model_lr = []
base = valid_data.timestamp.min()
for lag in range(28):
    print('lag : ',lag)
    train, valid = prep_for_train(train_data, lag, items)
#     train = train.dropna()
    train = train.fillna(0)
    train = train.drop(['timestamp'],axis=1)  
    valid = valid.fillna(0)
    valid = valid.drop(['timestamp'],axis=1)      
    
    X_train = train.drop('price',axis=1)
    y_train = train['price']
    X_valid = valid.drop('price',axis=1)
    y_valid = valid['price']    
    lmodel = LinearRegression()
    lmodel.fit(X_train, y_train)
    model_lr.append(lmodel)

lag :  0
lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27


In [None]:
test_input = test_full.loc[test_full['item'].isin(items)]
test_data = test_input.loc[test_input.timestamp > test_input.timestamp.max()-28]

models = [model_lr]
base = test_data.timestamp.min()
test_data['pred'] = 0
for model_l in models:
    for lag in range(28):
        print('lag : ',lag)
        test = prep_for_test(test_input, lag, items)
        
        model = model_l[lag]

        test = test.loc[test.timestamp == base+lag]
        test = test.drop(['timestamp'],axis=1)      

        X_test = test.drop('price',axis=1)
        y_test = test['price']

        pred = model.predict(X_test)
        test_data.loc[test_data.timestamp == base+lag,'pred'] \
            = test_data.loc[test_data.timestamp == base+lag,'pred'] + pred*1/len(models)

    test_data.loc[test_data.weekday==6, 'pred'] = 0

for i in test_data.item.unique():
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():          
            if len(test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k)].price.values) ==0:
                break
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:]) == 0:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0       
                
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:] == 0) > 23:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0                  
          
test_data.loc[test_data.weekday==6, 'pred'] = 0    
test_data.loc[test_data.pred < 0,'pred'] = 0


for i in test_data.item.unique():
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():
            print('item : ',i,'corpolation : ',j,'location : ', k)
            plt.figure(figsize=(10,3))
            plt.plot(valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].timestamp,
                     valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].price)
            plt.plot(test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].timestamp,
                     test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].pred)
            plt.show()

In [None]:
# 과거 동월 데이터 형태와 비교
for i in items:
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():
            plt.figure(figsize=(10,3))
            print('item : ',i,'corpolation : ',j,'location : ', k)            
            train_data.loc[(train_data.item.isin(items)) & (train_data.corporation==j) & (train_data.location==k) & 
                           ((train_data.month==3) | (train_data.month==2))].groupby(['month','day']).mean()['price'].plot()
            plt.show()

In [71]:
submission = pd.read_csv(path+'sample_submission.csv')
submission = pd.concat([submission, test_full.loc[test_full.timestamp > test_full.timestamp.max()-28].reset_index(drop=True)[['timestamp','item','corporation','location']]], axis=1)
submission.loc[submission['item'].isin(items),'answer'] = test_data['pred'].values
submission.loc[submission['item'].isin(items)]

Unnamed: 0,ID,answer,timestamp,item,corporation,location
784,BC_A_J_20230304,3357.596055,1523,0,0,0
785,BC_A_J_20230305,0.000000,1524,0,0,0
786,BC_A_J_20230306,2921.025790,1525,0,0,0
787,BC_A_J_20230307,2893.853721,1526,0,0,0
788,BC_A_J_20230308,2922.065765,1527,0,0,0
...,...,...,...,...,...,...
1031,BC_E_S_20230327,3201.605270,1546,0,4,1
1032,BC_E_S_20230328,3132.510213,1547,0,4,1
1033,BC_E_S_20230329,3070.934736,1548,0,4,1
1034,BC_E_S_20230330,3756.902440,1549,0,4,1


In [72]:
submission.to_csv('submit_BC.csv',index=False)

### validation

In [73]:
items = [0]
train_data = train_full.loc[train_full['item'].isin(items)]
valid_data = train_data.loc[train_data.timestamp > train_data.timestamp.max()-28]    

model_lr = []
base = valid_data.timestamp.min()
for lag in range(28):
    print('lag : ',lag)
    train, valid = prep_for_train(train_data, lag, items, VALID=True)
#     train = train.dropna()
    train = train.fillna(0)
    train = train.drop(['timestamp'],axis=1)  
    valid = valid.fillna(0)
    valid = valid.drop(['timestamp'],axis=1)      
    
    X_train = train.drop('price',axis=1)
    y_train = train['price']
    X_valid = valid.drop('price',axis=1)
    y_valid = valid['price']    
    lmodel = LinearRegression()
    lmodel.fit(X_train, y_train)
    model_lr.append(lmodel)

lag :  0
lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27


In [None]:
models = [model_lr]
base = valid_data.timestamp.min()
valid_data['pred'] = 0
for model_l in models:
    for lag in range(28):
        print('lag : ',lag)
        train, valid = prep_for_train(train_data, lag, items,VALID=True)

        model = model_l[lag]

        valid = valid.loc[valid.timestamp == base+lag]
        valid = valid.drop(['timestamp'],axis=1)      

        X_valid = valid.drop('price',axis=1)
        y_valid = valid['price']

        pred = model.predict(X_valid)
        valid_data.loc[valid_data.timestamp == base+lag,'pred'] \
            = valid_data.loc[valid_data.timestamp == base+lag,'pred'] + pred*1/len(models)
        valid_data.loc[valid_data.weekday==6, 'pred'] = 0   

for i in train_data.item.unique():
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():          
            if len(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values) ==0:
                break                    
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:]) == 0:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0       
                
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:] == 0) > 23:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0                  
          
valid_data.loc[valid_data.weekday==6, 'pred'] = 0    
valid_data.loc[valid_data.pred < 0,'pred'] = 0

In [75]:
eval_data = valid_data.loc[valid_data.timestamp < base+ 28].copy()
print(np.sqrt(mean_squared_error(eval_data.price, eval_data.pred)))

public_data = eval_data.loc[eval_data.timestamp < base+ 14]
print(np.sqrt(mean_squared_error(public_data.price, public_data.pred)))

481.59418516069496
456.1552720776465


In [None]:
valid_tot= valid_data.loc[valid_data.timestamp < base+ 28].reset_index(drop=True)
for i in valid_tot.item.unique():
    for j in valid_tot.corporation.unique():
        for k in valid_tot.location.unique():
            if len(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values) ==0:
                break            
            print('item : ',i,'corpolation : ',j,'location : ', k)
            print('MAE SCORE : ', RMSE(valid_tot.loc[(valid_tot.item==i) & (valid_tot.corporation==j) & (valid_tot.location==k)].price.values, \
                                       valid_tot.loc[(valid_tot.item==i) & (valid_tot.corporation==j) & (valid_tot.location==k)].pred.values))
    
            plt.figure(figsize=(10,2))
            plt.plot(valid_tot.loc[(valid_tot.item==i) &
                (valid_tot.corporation==j) & (valid_tot.location==k)].reset_index(drop=True).price)
            plt.plot(valid_tot.loc[(valid_tot.item==i) &
                (valid_tot.corporation==j) & (valid_tot.location==k)].reset_index(drop=True).pred)
            plt.show()

## 기타 작물

In [77]:
items = [1,2,3]
train_data = train_full.loc[train_full['item'].isin(items)]
valid_data = train_data.loc[train_data.timestamp > train_data.timestamp.max()-28]    

In [78]:
model_rf = []
base = valid_data.timestamp.min()
for lag in range(28):
    print('lag : ',lag)
    train, valid = prep_for_train(train_data, lag, items)
    train = train.fillna(0)
    train = train.drop(['timestamp'],axis=1)  
    valid = valid.fillna(0)
    valid = valid.drop(['timestamp'],axis=1)      
    
    X_train = train.drop('price',axis=1)
    y_train = train['price']
    X_valid = valid.drop('price',axis=1)
    y_valid = valid['price']    
    model = RandomForestRegressor(n_estimators=200,
                                random_state=42,
                                max_depth = 7,
                                n_jobs=-1)
    model.fit(X_train, y_train)
    model_rf.append(model)
    
valid_data.loc[valid_data.weekday=='6', 'pred'] = 0

lag :  0
lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
test_input = test_full.loc[test_full['item'].isin(items)]
test_data = test_input.loc[test_input.timestamp > test_input.timestamp.max()-28]

models = [model_rf]
base = test_data.timestamp.min()
test_data['pred'] = 0
for model_l in models:
    for lag in range(28):
        print('lag : ',lag)
        test = prep_for_test(test_input, lag, items)

        model = model_l[lag]

        test = test.loc[test.timestamp == base+lag]
        test = test.drop(['timestamp'],axis=1)      

        X_test = test.drop('price',axis=1)
        y_test = test['price']

        pred = model.predict(X_test)
        test_data.loc[test_data.timestamp == base+lag,'pred'] \
            = test_data.loc[test_data.timestamp == base+lag,'pred'] + pred*1/len(models)

    test_data.loc[test_data.weekday==6, 'pred'] = 0

for i in test_data.item.unique():
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():          
            if len(test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k)].price.values) ==0:
                break

            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:]) == 0:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0       
                
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:] == 0) > 23:
                test_data.loc[(test_data.item==i) & (test_data.corporation==j) & (test_data.location==k),'pred'] = 0                  
          
test_data.loc[test_data.weekday==6, 'pred'] = 0    
test_data.loc[test_data.pred < 0,'pred'] = 0

In [None]:
for i in test_data.item.unique():
    for j in test_data.corporation.unique():
        for k in test_data.location.unique():
            print('item : ',i,'corpolation : ',j,'location : ', k)
            plt.figure(figsize=(10,3))
            plt.plot(valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].timestamp,
                     valid_data.loc[(valid_data.item==i) &
                (valid_data.corporation==j) & (valid_data.location==k)].price)
            plt.plot(test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].timestamp,
                     test_data.loc[(test_data.item==i) &
                (test_data.corporation==j) & (test_data.location==k)].pred)
            plt.show()

submission = pd.read_csv(path+'sample_submission.csv')
submission = pd.concat([submission, test_full.loc[test_full.timestamp > test_full.timestamp.max()-28].reset_index(drop=True)[['timestamp','item','corporation','location']]], axis=1)
submission.loc[submission['item'].isin(items),'answer'] = test_data['pred'].values

In [None]:
# 과거 동월 데이터 형태와 비교
for i in items:
    for j in train_data.corporation.unique():
        for k in train_data.location.unique():
            plt.figure(figsize=(10,3))
            print('item : ',i,'corpolation : ',j,'location : ', k)            
            train_data.loc[(train_data.item.isin(items)) & (train_data.corporation==j) & (train_data.location==k) & 
                           ((train_data.month==3) | (train_data.month==2))].groupby(['month','day']).mean()['price'].plot()
            plt.show()

In [82]:
submission.to_csv('submit_ETC.csv',index=False)

### validation

In [83]:
items = [1,2,3]
train_data = train_full.loc[train_full['item'].isin(items)]
valid_data = train_data.loc[train_data.timestamp > train_data.timestamp.max()-28]   

model_rf = []
base = valid_data.timestamp.min()
for lag in range(28):
    print('lag : ',lag)
    train, valid = prep_for_train(train_data, lag, items,True)
#     train = train.dropna()
    train = train.fillna(0)
    train = train.drop(['timestamp'],axis=1)  
    valid = valid.fillna(0)
    valid = valid.drop(['timestamp'],axis=1)      
    
    X_train = train.drop('price',axis=1)
    y_train = train['price']
    X_valid = valid.drop('price',axis=1)
    y_valid = valid['price']    
    model = RandomForestRegressor(n_estimators=200,
                                random_state=42,
                                max_depth = 7,
                                n_jobs=-1)
    model.fit(X_train, y_train)
    model_rf.append(model)
    
valid_data.loc[valid_data.weekday==6, 'pred'] = 0

lag :  0
lag :  1
lag :  2
lag :  3
lag :  4
lag :  5
lag :  6
lag :  7
lag :  8
lag :  9
lag :  10
lag :  11
lag :  12
lag :  13
lag :  14
lag :  15
lag :  16
lag :  17
lag :  18
lag :  19
lag :  20
lag :  21
lag :  22
lag :  23
lag :  24
lag :  25
lag :  26
lag :  27


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
models = [model_rf] #, model_lgbm, model_lr, model_cat, model_rf] 
base = valid_data.timestamp.min()
valid_data['pred'] = 0
for model_l in models:
    for lag in range(28):
        print('lag : ',lag)
        train, valid = prep_for_train(train_data, lag, items)
        train = train.dropna()

        X_train = train.drop('price',axis=1)
        y_train = train['price']

        model = model_l[lag]

        valid = valid.loc[valid.timestamp == base+lag]
        valid = valid.drop(['timestamp'],axis=1)      

        X_valid = valid.drop('price',axis=1)
        y_valid = valid['price']

        pred = model.predict(X_valid)
        valid_data.loc[valid_data.timestamp == base+lag,'pred'] \
            = valid_data.loc[valid_data.timestamp == base+lag,'pred'] + pred*1/len(models)

    valid_data.loc[valid_data.weekday==6, 'pred'] = 0

In [85]:
for i in valid_data.item.unique():
    for j in valid_data.corporation.unique():
        for k in valid_data.location.unique():                              
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:]) == 0:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0                     
            if np.sum(train_data.loc[(train_data.item==i) & (train_data.corporation==j) & (train_data.location==k),'price'].values[-28:] == 0) > 23:
                valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k),'pred'] = 0                  

valid_data.loc[valid_data.weekday==6, 'pred'] = 0   
valid_data['pred'] = valid_data.apply(lambda x : 0 if (x['item']==1) & (x.weekday==5) & ((x.corporation==3) | (x.corporation==4)) else x.pred ,axis=1)
valid_data['pred'] = valid_data.apply(lambda x : 0 if (x['item']==2) & (x.weekday in [4,5]) & (x.corporation==4) & (x.location=='1') else x.pred ,axis=1)
valid_data['pred'] = valid_data.apply(lambda x : 0 if (x['item']==3) & (x.weekday in [3]) & (x.corporation==4) & (x.location=='0') else x.pred ,axis=1)
# valid_data['pred'] = valid_data.apply(lambda x : 0 if (x['item']=='1') & (x.weekday in ['5']) & (x.corporation=='0') & (x.location=='0') else x.pred ,axis=1)

valid_data.loc[valid_data.pred < 0,'pred'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_data['pred'] = valid_data.apply(lambda x : 0 if (x['item']==1) & (x.weekday==5) & ((x.corporation==3) | (x.corporation==4)) else x.pred ,axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_data['pred'] = valid_data.apply(lambda x : 0 if (x['item']==2) & (x.weekday in [4,5]) & (x.corporation==4) & (x.location=='1') else x.pred ,axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: htt

In [86]:
eval_data = valid_data.loc[valid_data['item'].isin(items)].copy()
print(np.sqrt(mean_squared_error(eval_data.price, eval_data.pred)))

public_data = eval_data.loc[eval_data.timestamp < base+ 14]
print(np.sqrt(mean_squared_error(public_data.price, public_data.pred)))

381.2753124381147
383.73428180713927


In [None]:
valid_tot= valid_data.reset_index(drop=True)
for i in valid_tot.item.unique():
    for j in valid_tot.corporation.unique():
        for k in valid_tot.location.unique():
            if len(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values) ==0:
                break            
            print('item : ',i,'corpolation : ',j,'location : ', k)
            print('MAE SCORE : ', RMSE(valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].price.values, valid_data.loc[(valid_data.item==i) & (valid_data.corporation==j) & (valid_data.location==k)].pred.values))
    
            plt.figure(figsize=(10,2))
            plt.plot(valid_tot.loc[(valid_tot.item==i) &
                (valid_tot.corporation==j) & (valid_tot.location==k)].reset_index(drop=True).price)
            plt.plot(valid_tot.loc[(valid_tot.item==i) &
                (valid_tot.corporation==j) & (valid_tot.location==k)].reset_index(drop=True).pred)
            plt.show()

# 제출

In [115]:
submission = pd.read_csv(path+'sample_submission.csv')
submission = pd.concat([submission, test_full.loc[test_full.timestamp > test_full.timestamp.max()-28].reset_index(drop=True)[['timestamp','item','corporation','location']]], axis=1)
submission

Unnamed: 0,ID,answer,timestamp,item,corporation,location
0,TG_A_J_20230304,0,1523,4,0,0
1,TG_A_J_20230305,0,1524,4,0,0
2,TG_A_J_20230306,0,1525,4,0,0
3,TG_A_J_20230307,0,1526,4,0,0
4,TG_A_J_20230308,0,1527,4,0,0
...,...,...,...,...,...,...
1087,RD_F_J_20230327,0,1546,3,5,0
1088,RD_F_J_20230328,0,1547,3,5,0
1089,RD_F_J_20230329,0,1548,3,5,0
1090,RD_F_J_20230330,0,1549,3,5,0


In [116]:
submission_dl = pd.read_csv(path+'submit_dl.csv')

In [117]:
submission_TG = pd.read_csv(path+'submit_TG.csv')
submission.loc[submission['item']==4,'answer'] = submission_TG.loc[submission_TG['item']==4,'answer'].values
submission_BC = pd.read_csv(path+'submit_BC.csv')
submission.loc[submission['item']==0,'answer'] = submission_BC.loc[submission_BC['item']==0,'answer'].values
submission_ETC = pd.read_csv(path+'submit_ETC.csv')
submission.loc[submission['item'].isin([1,2,3]),'answer'] = submission_ETC.loc[submission_ETC['item'].isin([1,2,3]),'answer'].values 

In [118]:
base = submission.timestamp.min()
# 전반은 중단기 시계열 예측에 적합한 DLinear모델의 비중을 높게 설정
submission.loc[(submission.timestamp < base+14),'answer'] \
    = submission.loc[(submission.timestamp < base+14) ,'answer'].values*0.3 \
    + submission_dl.loc[(submission.timestamp < base+14) ,'answer'].values*0.7
# 하반은 multi variable 반영이 가능한 머신러닝 모델의 비중을 높게 설정
submission.loc[submission.timestamp >= base+14,'answer'] \
    = submission.loc[submission.timestamp >= base+14,'answer'].values*0.8 \
    + submission_dl.loc[submission.timestamp >= base+14,'answer'].values*0.2

In [None]:
for i in submission.item.unique():
    for j in submission.corporation.unique():
        for k in submission.location.unique():
            plt.figure(figsize=(10,3))
            print('item : ',i,'corpolation : ',j,'location : ', k)
            plt.plot(submission.loc[(submission.item==i) &
                (submission.corporation==j) & (submission.location==k)].timestamp,
                     submission.loc[(submission.item==i) &
                (submission.corporation==j) & (submission.location==k)].answer)  
            plt.show()

## 파일저장

In [120]:
submission.iloc[:,:2].to_csv(path+'submit.csv',index=False)
submission.iloc[:,:2]

Unnamed: 0,ID,answer
0,TG_A_J_20230304,3371.268715
1,TG_A_J_20230305,0.000000
2,TG_A_J_20230306,3167.927539
3,TG_A_J_20230307,3216.741720
4,TG_A_J_20230308,3072.831657
...,...,...
1087,RD_F_J_20230327,409.878825
1088,RD_F_J_20230328,440.111933
1089,RD_F_J_20230329,413.770154
1090,RD_F_J_20230330,436.582359
