In [1]:
import random
import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
import os

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from math import sqrt

import warnings
warnings.filterwarnings(action='ignore') 

### Declare Global Variables

In [2]:
DATA_PATH  = './data/'
MODEL_PATH = './models/'
SUBMISSION_PATH = './submission/'

TRAIN_SET = 'train.csv'
TEST_SET  = 'test.csv'

### Fixed Random-Seed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

### Load Data

In [4]:
# train_df = pd.read_csv(DATA_PATH + TRAIN_SET)
# submit_df = pd.read_csv(DATA_PATH + TEST_SET)

### Data Pre-Processing

In [5]:
# #시계열 특성을 학습에 반영하기 위해 timestamp를 월, 일, 시간으로 나눕니다
# train_df['year'] = train_df['timestamp'].apply(lambda x : int(x[0:4]))
# train_df['month'] = train_df['timestamp'].apply(lambda x : int(x[5:7]))
# train_df['day'] = train_df['timestamp'].apply(lambda x : int(x[8:10]))

# submit_df['year'] = submit_df['timestamp'].apply(lambda x : int(x[0:4]))
# submit_df['month'] = submit_df['timestamp'].apply(lambda x : int(x[5:7]))
# submit_df['day'] = submit_df['timestamp'].apply(lambda x : int(x[8:10]))

In [6]:
# #학습에 사용하지 않을 변수들을 제거합니다
# train_df = train_df.drop(columns=['ID', 'supply(kg)'])
# submit_df = submit_df.drop(columns=['ID'])

In [7]:
# #질적 변수들을 수치화합니다
# qual_col = ['item', 'corporation', 'location']

# for i in qual_col:
#     le = LabelEncoder()
#     train_df[i]=le.fit_transform(train_df[i])
#     submit_df[i]=le.transform(submit_df[i]) #test 데이터에 대해서 fit하는 것은 data leakage에 해당합니다

# print('Done.')

### Regression Model Fit

In [9]:
# train_df, test_df, _, _ = train_test_split(train_df, train_df, test_size=0.2, random_state=42)

In [10]:
class LTSF_Linear(torch.nn.Module):
    def __init__(self, window_size, forcast_size, individual, feature_size):
        super(LTSF_Linear, self).__init__()
        self.window_size = window_size
        self.forcast_size = forcast_size
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear.append(torch.nn.Linear(self.window_size, self.forcast_size))
        else:
            self.Linear = torch.nn.Linear(self.window_size, self.forcast_size)

    def forward(self, x):
        if self.individual:
            output = torch.zeros([x.size(0), self.forcast_size, x.size(2)],dtype=x.dtype).to(x.device)
            for i in range(self.channels):
                output[:,:,i] = self.Linear[i](x[:,:,i])
            x = output
        else:
            x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        return x
 

In [11]:
class moving_avg(torch.nn.Module):
    def __init__(self, kernel_size, stride):
        super(moving_avg, self).__init__()
        self.kernel_size = kernel_size
        self.avg = torch.nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)

    def forward(self, x):
        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
        x = torch.cat([front, x, end], dim=1)
        x = self.avg(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        return x


class series_decomp(torch.nn.Module):
    def __init__(self, kernel_size):
        super(series_decomp, self).__init__()
        self.moving_avg = moving_avg(kernel_size, stride=1)

    def forward(self, x):
        moving_mean = self.moving_avg(x)
        residual = x - moving_mean
        return moving_mean, residual 
        

class LTSF_DLinear(torch.nn.Module):
    def __init__(self, window_size, forcast_size, kernel_size, individual, feature_size):
        super(LTSF_DLinear, self).__init__()
        self.window_size = window_size
        self.forcast_size = forcast_size
        self.decompsition = series_decomp(kernel_size)
        self.individual = individual
        self.channels = feature_size
        if self.individual:
            self.Linear_Seasonal = torch.nn.ModuleList()
            self.Linear_Trend = torch.nn.ModuleList()
            for i in range(self.channels):
                self.Linear_Trend.append(torch.nn.Linear(self.window_size, self.forcast_size))
                self.Linear_Trend[i].weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
                self.Linear_Seasonal.append(torch.nn.Linear(self.window_size, self.forcast_size))
                self.Linear_Seasonal[i].weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
        else:
            self.Linear_Trend = torch.nn.Linear(self.window_size, self.forcast_size)
            self.Linear_Trend.weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))
            self.Linear_Seasonal = torch.nn.Linear(self.window_size,  self.forcast_size)
            self.Linear_Seasonal.weight = torch.nn.Parameter((1/self.window_size)*torch.ones([self.forcast_size, self.window_size]))

    def forward(self, x):
        trend_init, seasonal_init = self.decompsition(x)
        trend_init, seasonal_init = trend_init.permute(0,2,1), seasonal_init.permute(0,2,1)
        if self.individual:
            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.forcast_size], dtype=trend_init.dtype).to(trend_init.device)
            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.forcast_size], dtype=seasonal_init.dtype).to(seasonal_init.device)
            for idx in range(self.channels):
                trend_output[:, idx, :] = self.Linear_Trend[idx](trend_init[:, idx, :])
                seasonal_output[:, idx, :] = self.Linear_Seasonal[idx](seasonal_init[:, idx, :])                
        else:
            trend_output = self.Linear_Trend(trend_init)
            seasonal_output = self.Linear_Seasonal(seasonal_init)
        x = seasonal_output + trend_output
        return x.permute(0,2,1)

In [12]:
def standardization(train_df, test_df, not_col, target):
    train_df_ = train_df.copy()
    test_df_ = test_df.copy()
    submit_df_ = submit_df.copy()
    col =  [col for col in list(train_df.columns) if col not in [not_col]]
    mean_list = []
    std_list = []
    for x in col:
        mean, std = train_df_.agg(["mean", "std"]).loc[:,x]
        mean_list.append(mean)
        std_list.append(std)
        train_df_.loc[:, x] = (train_df_[x] - mean) / std
        test_df_.loc[:, x] = (test_df_[x] - mean) / std
    return train_df_, test_df_, mean_list[col.index(target)], std_list[col.index(target)]

def min_max_scaling(train_df, test_df, not_col, target):
    train_df_ = train_df.copy()
    test_df_ = test_df.copy()
    
    # Exclude 'not_col' from the list of columns
    col = [col for col in list(train_df.columns) if col != not_col]
    
    min_list = []
    max_list = []
    
    for x in col:
        min_val = train_df_[x].min()
        max_val = train_df_[x].max()
        
        min_list.append(min_val)
        max_list.append(max_val)
        
        # Apply Min-Max scaling
        train_df_[x] = (train_df_[x] - min_val) / (max_val - min_val)
        test_df_[x] = (test_df_[x] - min_val) / (max_val - min_val)
    
    return train_df_, test_df_, min_list, max_list




def time_slide_df(df, window_size, forcast_size, date, target):
    df_ = df.copy()
    df_ = df_.reset_index()
    data_list = []
    dap_list = []
    date_list = []
    for idx in range(0, df_.shape[0]-window_size-forcast_size+1):
        x = df_.loc[idx:idx+window_size-1, target].values.reshape(window_size, 1)
        y = df_.loc[idx+window_size:idx+window_size+forcast_size-1, target].values
        date_ = df_.loc[idx+window_size:idx+window_size+forcast_size-1, date].values
        data_list.append(x)
        dap_list.append(y)
        date_list.append(date_)
    return np.array(data_list, dtype='float32'), np.array(dap_list, dtype='float32'), np.array(date_list)


def time_slide_result_df(df, window_size, forcast_size, date, target):
    df_ = df.copy()
    df_ = df_.reset_index()
    data_list = []
    dap_list = []
    date_list = []
    x = df_.loc[-window_size:, target].values.reshape(window_size, 1)
    return np.array(x, dtype='float32')
    

class Data(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]
        

In [13]:
# ### Univariable ###
# ### 데이터 셋 생성 ###
# window_size = 56
# forcast_size= 28
# batch_size = 32
# targets = 'price(원/kg)'
# date = 'timestamp'

# train_df_fe, test_df_fe, submit_fe, mean_, std_ = standardization(train_df, test_df, submit_df, 'timestamp', targets)
# train_x, train_y, train_date = time_slide_df(train_df_fe, window_size, forcast_size, date, targets)
# test_x, test_y, test_date = time_slide_df(test_df_fe, window_size, forcast_size, date, targets)

# train_ds = Data(train_x[:1000], train_y[:1000])
# valid_ds = Data(train_x[1000:], train_y[1000:])
# test_ds = Data(test_x, test_y)

# train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle=True,)
# valid_dl = DataLoader(valid_ds, batch_size = train_x[1000:].shape[0], shuffle=False)
# test_dl  = DataLoader(test_ds,  batch_size = test_x.shape[0], shuffle=False)


In [14]:
# ### 모델 학습 ###
# train_loss_list = []
# valid_loss_list = []
# test_loss_list = []
# epoch = 50
# lr = 0.001
# DLinear_model = LTSF_DLinear(
#                             window_size=window_size,
#                             forcast_size=forcast_size,
#                             kernel_size=25,
#                             individual=False,
#                             feature_size=1,
#                             )
# criterion = torch.nn.MSELoss()
# optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
# max_loss = 999999999

# for epoch in tqdm(range(1, epoch+1)):
#     loss_list = []
#     DLinear_model.train()
#     for batch_idx, (data, target) in enumerate(train_dl):
#         optimizer.zero_grad()
#         output = DLinear_model(data)
#         loss = criterion(output, target.unsqueeze(-1))
#         loss.backward()
#         optimizer.step()
#         loss_list.append(loss.item())    
#     train_loss_list.append(np.mean(loss_list))

#     DLinear_model.eval()
#     with torch.no_grad():
#         for data, target in valid_dl:
#             output = DLinear_model(data)
#             valid_loss = criterion(output, target.unsqueeze(-1))
#             valid_loss_list.append(valid_loss)
        
#         for data, target in test_dl:
#             output = DLinear_model(data)
#             test_loss = criterion(output, target.unsqueeze(-1))
#             test_loss_list.append(test_loss)

#     if valid_loss < max_loss:
#         torch.save(DLinear_model, 'DLinear_model.pth')
#         max_loss = valid_loss
#         print("valid_loss={:.3f}, test_los{:.3f}, Model Save".format(valid_loss, test_loss))
#         dlinear_best_epoch = epoch
#         dlinear_best_train_loss = np.mean(loss_list)
#         dlinear_best_valid_loss = np.mean(valid_loss.item())
#         dlinear_best_test_loss = np.mean(test_loss.item())

#     print("epoch = {}, train_loss : {:.3f}, valid_loss : {:.3f}, test_loss : {:.3f}".format(epoch, np.mean(loss_list), valid_loss, test_loss))

In [15]:
# ### Validations
# import matplotlib.pyplot as plt

# weights_list = {}
# weights_list['trend'] = DLinear_model.Linear_Trend.weight.detach().numpy()
# weights_list['seasonal'] = DLinear_model.Linear_Seasonal.weight.detach().numpy()

# for name, w in weights_list.items():    
#     fig, ax = plt.subplots()    
#     plt.title(name)
#     im = ax.imshow(w, cmap='plasma_r',)
#     fig.colorbar(im, pad=0.03)
#     plt.show()


In [16]:
def reverse_standardization(data, mean, std):
    """
    정규화된 데이터를 원래 값으로 되돌립니다.
    
    Parameters:
    - data: 정규화된 데이터
    - mean: 평균
    - std: 표준편차
    
    Returns:
    - 원래 값으로 복구된 데이터
    """
    return data * std + mean

def convert_standardization(data, mean, std):
    """
    지정된 평균 및 표준편차 값으로 정규화된 데이터를 진행합니다.
    
    Parameters:
    - data: 정규화된 데이터
    - mean: 평균
    - std: 표준편차
    
    Returns:
    - 지정된 평균 및 표준편차 값으로 정규화된 데이터
    """
    return (data-mean)/std


def reverse_min_max_scaling(data, min_val, max_val):
    """
    Min-Max 스케일링된 데이터를 원래 값으로 되돌립니다.
    
    Parameters:
    - data: Min-Max 스케일링된 데이터
    - min_val: 해당 feature의 최솟값
    - max_val: 해당 feature의 최댓값
    
    Returns:
    - 원래 값으로 복구된 데이터
    """
    return data * (max_val - min_val) + min_val


def convert_min_max_scaling(data, min_val, max_val):
    """
    지정된 최솟값과 최댓값을 사용하여 Min-Max 스케일링을 진행합니다.
    
    Parameters:
    - data: 원래 값의 데이터
    - min_val: 해당 feature의 최솟값
    - max_val: 해당 feature의 최댓값
    
    Returns:
    - Min-Max 스케일링된 데이터
    """
    return (data - min_val) / (max_val - min_val)



### Train & Inference

In [17]:
# train_dataframe = pd.read_csv(DATA_PATH + TRAIN_SET)
# train_dataframe['location'].unique()

In [18]:
# # 출력 제한 해제
# pd.set_option('display.max_columns',None)
# pd.set_option('display.max_rows',None)
# pd.set_option('display.max_seq_items', None)
# pd.options.display.max_colwidth = 100

# train_dataframe.head(10)

In [19]:
# train_dataframe.shape

In [20]:
# train_dataframe = pd.read_csv(DATA_PATH + TRAIN_SET)
# results = []
# lens = 0

# for i in list(train_dataframe['item'].unique()):
#     for k in train_dataframe[train_dataframe['item']==i]['corporation'].unique():
#         for j in ['J','S']:
#             print(f'item : {i} , corporation : {k} , location : {j} ')
#             train_df = train_dataframe[(train_dataframe['item']==i)&\
#                     (train_dataframe['location']==j)&\
#                     (train_dataframe['corporation']==k)]
#             output_result = train_df.copy()
#             lens += len(train_df)
#             if len(train_df) == 0: 
#                 print('gd')
#                 continue

#             train_df['year'] = train_df['timestamp'].apply(lambda x : int(x[0:4]))
#             train_df['month'] = train_df['timestamp'].apply(lambda x : int(x[5:7]))
#             train_df['day'] = train_df['timestamp'].apply(lambda x : int(x[8:10]))
            
#             #학습에 사용하지 않을 변수들을 제거합니다
#             train_df = train_df.drop(columns=['ID', 'supply(kg)'])
#             #submit_df = submit_df.drop(columns=['ID'])

#             #질적 변수들을 수치화합니다
#             qual_col = ['item', 'corporation', 'location']

#             for idx in qual_col:
#                 le = LabelEncoder()
#                 train_df[idx]=le.fit_transform(train_df[idx])
#                 #submit_df[i]=le.transform(submit_df[i]) #test 데이터에 대해서 fit하는 것은 data leakage에 해당합니다

#             train_df, test_df, _, _ = train_test_split(train_df, train_df, test_size=0.2, random_state=42)

#             ### 데이터 셋 생성 ###
#             window_size = 56
#             forcast_size= 28
#             batch_size = 32
#             targets = 'price(원/kg)'
#             date = 'timestamp'

#             train_df_fe, test_df_fe, mean_, std_ = standardization(train_df, test_df, 'timestamp', targets)
#             train_x, train_y, train_date = time_slide_df(train_df_fe, window_size, forcast_size, date, targets)
#             test_x, test_y, test_date = time_slide_df(test_df_fe, window_size, forcast_size, date, targets)

#             train_ds = Data(train_x[:1000], train_y[:1000])
#             valid_ds = Data(train_x[1000:], train_y[1000:])
#             test_ds = Data(test_x, test_y)

#             train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle=True,)
#             valid_dl = DataLoader(valid_ds, batch_size = train_x[1000:].shape[0], shuffle=False)
#             test_dl  = DataLoader(test_ds,  batch_size = test_x.shape[0], shuffle=False)

#             ### 모델 학습 ###
#             train_loss_list = []
#             valid_loss_list = []
#             test_loss_list = []
#             epoch = 50
#             lr = 0.001
#             DLinear_model = LTSF_DLinear(
#                                         window_size=window_size,
#                                         forcast_size=forcast_size,
#                                         kernel_size=25,
#                                         individual=False,
#                                         feature_size=1,
#                                         )
#             criterion = torch.nn.MSELoss()
#             optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
#             max_loss = 999999999

#             for epoch in tqdm(range(1, epoch+1)):
#                 loss_list = []
#                 DLinear_model.train()
#                 for batch_idx, (data, target) in enumerate(train_dl):
#                     optimizer.zero_grad()
#                     output = DLinear_model(data)
#                     loss = criterion(output, target.unsqueeze(-1))
#                     loss.backward()
#                     optimizer.step()
#                     loss_list.append(loss.item())    
#                 train_loss_list.append(np.mean(loss_list))

#                 DLinear_model.eval()
#                 with torch.no_grad():
#                     for data, target in valid_dl:
#                         output = DLinear_model(data)
#                         valid_loss = criterion(output, target.unsqueeze(-1))
#                         valid_loss_list.append(valid_loss)
                    
#                     for data, target in test_dl:
#                         output = DLinear_model(data)
#                         test_loss = criterion(output, target.unsqueeze(-1))
#                         test_loss_list.append(test_loss)

#                 if valid_loss < max_loss:
#                     torch.save(DLinear_model, 'DLinear_model.pth')
#                     max_loss = valid_loss
#                     #print("valid_loss={:.3f}, test_los{:.3f}, Model Save".format(valid_loss, test_loss))
#                     dlinear_best_epoch = epoch
#                     dlinear_best_train_loss = np.mean(loss_list)
#                     dlinear_best_valid_loss = np.mean(valid_loss.item())
#                     dlinear_best_test_loss = np.mean(test_loss.item())

#                 #print("epoch = {}, train_loss : {:.3f}, valid_loss : {:.3f}, test_loss : {:.3f}".format(epoch, np.mean(loss_list), valid_loss, test_loss))

#             output_result['year'] = output_result['timestamp'].apply(lambda x : int(x[0:4]))
#             output_result['month'] = output_result['timestamp'].apply(lambda x : int(x[5:7]))
#             output_result['day'] = output_result['timestamp'].apply(lambda x : int(x[8:10]))
#             output_result = output_result.drop(columns=['ID', 'supply(kg)'])

#             qual_col = ['item', 'corporation', 'location']
#             for idx in qual_col:
#                 le = LabelEncoder()
#                 output_result[idx]=le.fit_transform(output_result[idx])

#             last_56_days_data = np.array(output_result[-window_size:]['price(원/kg)']).reshape(1, window_size, 1)
#             last_56_days_data = convert_standardization(last_56_days_data, mean_, std_)
#             last_56_days_data = torch.tensor(last_56_days_data, dtype=torch.float32)

#             DLinear_model.eval()
#             with torch.no_grad():
#                 predictions = DLinear_model(last_56_days_data)

#             predictions = reverse_standardization(predictions, mean_, std_)
#             predictions = predictions.reshape(28)
#             results.extend(np.array(predictions))

In [27]:
def min_max_standardization(train_df, test_df, not_col, target):
    train_df_ = train_df.copy()
    test_df_ = test_df.copy()
    col = [col for col in list(train_df.columns) if col not in [not_col]]
    
    min_list = []
    max_list = []
    
    for x in col:
        min_val = train_df_[x].min()
        max_val = train_df_[x].max()
        min_list.append(min_val)
        max_list.append(max_val)
        
        train_df_[x] = (train_df_[x] - min_val) / (max_val - min_val)
        test_df_[x] = (test_df_[x] - min_val) / (max_val - min_val)
    
    return train_df_, test_df_, min_list[col.index(target)], max_list[col.index(target)]

def min_max_scaling(data, min_val, max_val):
    """
    주어진 데이터를 Min-Max 스케일링합니다.

    Parameters:
    - data: 스케일링할 데이터
    - min_val: 데이터에서의 최솟값
    - max_val: 데이터에서의 최댓값

    Returns:
    - Min-Max 스케일링된 데이터
    """
    return (data - min_val) / (max_val - min_val)

def min_max_reverse_scaling(data, min_val, max_val):
    """
    Min-Max 스케일링된 데이터를 원래 값으로 복구합니다.

    Parameters:
    - data: Min-Max 스케일링된 데이터
    - min_val: 데이터에서의 최솟값
    - max_val: 데이터에서의 최댓값

    Returns:
    - 원래 값으로 복구된 데이터
    """
    return data * (max_val - min_val) + min_val


train_dataframe = pd.read_csv(DATA_PATH + TRAIN_SET)
results = []
lens = 0

for i in list(train_dataframe['item'].unique()):
    for k in train_dataframe[train_dataframe['item'] == i]['corporation'].unique():
        for j in ['J', 'S']:
            print(f'item : {i} , corporation : {k} , location : {j} ')
            train_df = train_dataframe[(train_dataframe['item'] == i) & \
                    (train_dataframe['location'] == j) & \
                    (train_dataframe['corporation'] == k)]
            output_result = train_df.copy()
            lens += len(train_df)
            if len(train_df) == 0:
                print('gd')
                continue

            train_df['year'] = train_df['timestamp'].apply(lambda x: int(x[0:4]))
            train_df['month'] = train_df['timestamp'].apply(lambda x: int(x[5:7]))
            train_df['day'] = train_df['timestamp'].apply(lambda x: int(x[8:10]))

            # 학습에 사용하지 않을 변수들을 제거합니다
            train_df = train_df.drop(columns=['ID', 'supply(kg)'])
            # submit_df = submit_df.drop(columns=['ID'])

            # 질적 변수들을 수치화합니다
            qual_col = ['item', 'corporation', 'location']

            for idx in qual_col:
                le = LabelEncoder()
                train_df[idx] = le.fit_transform(train_df[idx])
                # submit_df[i] = le.transform(submit_df[i]) # test 데이터에 대해서 fit하는 것은 data leakage에 해당합니다

            train_df, test_df, _, _ = train_test_split(train_df, train_df, test_size=0.2, random_state=42)

            ### 데이터 셋 생성 ###
            window_size = 28*1
            forcast_size = 28
            batch_size = 32
            targets = 'price(원/kg)'
            date = 'timestamp'

            train_df_fe, test_df_fe, min_, max_ = min_max_standardization(train_df, test_df, 'timestamp', targets)
            train_x, train_y, train_date = time_slide_df(train_df_fe, window_size, forcast_size, date, targets)
            test_x, test_y, test_date = time_slide_df(test_df_fe, window_size, forcast_size, date, targets)

            train_ds = Data(train_x[:1000], train_y[:1000])
            valid_ds = Data(train_x[1000:], train_y[1000:])
            test_ds = Data(test_x, test_y)

            train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=False)
            valid_dl = DataLoader(valid_ds, batch_size=train_x[1000:].shape[0], shuffle=False)
            test_dl = DataLoader(test_ds, batch_size=test_x.shape[0], shuffle=False)

            ### 모델 학습 ###
            train_loss_list = []
            valid_loss_list = []
            test_loss_list = []
            epoch = 100
            lr = 0.0005
            DLinear_model = LTSF_DLinear(
                window_size=window_size,
                forcast_size=forcast_size,
                kernel_size=25,
                individual=False,
                feature_size=1
            )
            criterion = torch.nn.MSELoss()
            optimizer = torch.optim.Adam(DLinear_model.parameters(), lr=lr)
            max_loss = 999999999

            for epoch in tqdm(range(1, epoch + 1)):
                loss_list = []
                DLinear_model.train()
                for batch_idx, (data, target) in enumerate(train_dl):
                    optimizer.zero_grad()
                    output = DLinear_model(data)
                    loss = criterion(output, target.unsqueeze(-1))
                    loss.backward()
                    optimizer.step()
                    loss_list.append(loss.item())
                train_loss_list.append(np.mean(loss_list))

                DLinear_model.eval()
                with torch.no_grad():
                    for data, target in valid_dl:
                        output = DLinear_model(data)
                        valid_loss = criterion(output, target.unsqueeze(-1))
                        valid_loss_list.append(valid_loss)

                    for data, target in test_dl:
                        output = DLinear_model(data)
                        test_loss = criterion(output, target.unsqueeze(-1))
                        test_loss_list.append(test_loss)

                if valid_loss < max_loss:
                    torch.save(DLinear_model, 'DLinear_model.pth')
                    max_loss = valid_loss
                    # print("valid_loss={:.3f}, test_los{:.3f}, Model Save".format(valid_loss, test_loss))
                    dlinear_best_epoch = epoch
                    dlinear_best_train_loss = np.mean(loss_list)
                    dlinear_best_valid_loss = np.mean(valid_loss.item())
                    dlinear_best_test_loss = np.mean(test_loss.item())

                # print("epoch = {}, train_loss : {:.3f}, valid_loss : {:.3f}, test_loss : {:.3f}".format(epoch, np.mean(loss_list), valid_loss, test_loss))
            
            DLinear_model = torch.load('DLinear_model.pth')
            output_result['year'] = output_result['timestamp'].apply(lambda x: int(x[0:4]))
            output_result['month'] = output_result['timestamp'].apply(lambda x: int(x[5:7]))
            output_result['day'] = output_result['timestamp'].apply(lambda x: int(x[8:10]))
            output_result = output_result.drop(columns=['ID', 'supply(kg)'])

            qual_col = ['item', 'corporation', 'location']
            for idx in qual_col:
                le = LabelEncoder()
                output_result[idx] = le.fit_transform(output_result[idx])

            last_56_days_data = np.array(output_result[-window_size:]['price(원/kg)']).reshape(1, window_size, 1)
            last_56_days_data = min_max_scaling(last_56_days_data, min_, max_)
            last_56_days_data = torch.tensor(last_56_days_data, dtype=torch.float32)

            DLinear_model.eval()
            with torch.no_grad():
                predictions = DLinear_model(last_56_days_data)

            predictions = min_max_reverse_scaling(predictions, min_, max_)
            predictions = predictions.reshape(28)
            results.extend(np.array(predictions))


item : TG , corporation : A , location : J 


100%|██████████| 100/100 [00:01<00:00, 50.84it/s]


item : TG , corporation : A , location : S 


100%|██████████| 100/100 [00:01<00:00, 51.59it/s]


item : TG , corporation : B , location : J 


100%|██████████| 100/100 [00:01<00:00, 50.92it/s]


item : TG , corporation : B , location : S 


100%|██████████| 100/100 [00:01<00:00, 50.94it/s]


item : TG , corporation : C , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.36it/s]


item : TG , corporation : C , location : S 


100%|██████████| 100/100 [00:01<00:00, 50.05it/s]


item : TG , corporation : D , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.45it/s]


item : TG , corporation : D , location : S 


100%|██████████| 100/100 [00:01<00:00, 52.45it/s]


item : TG , corporation : E , location : J 


100%|██████████| 100/100 [00:01<00:00, 52.11it/s]


item : TG , corporation : E , location : S 


100%|██████████| 100/100 [00:01<00:00, 52.78it/s]


item : CR , corporation : A , location : J 


100%|██████████| 100/100 [00:01<00:00, 50.24it/s]


item : CR , corporation : A , location : S 
gd
item : CR , corporation : B , location : J 


100%|██████████| 100/100 [00:02<00:00, 47.38it/s]


item : CR , corporation : B , location : S 
gd
item : CR , corporation : C , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.70it/s]


item : CR , corporation : C , location : S 
gd
item : CR , corporation : D , location : J 


100%|██████████| 100/100 [00:01<00:00, 50.91it/s]


item : CR , corporation : D , location : S 


100%|██████████| 100/100 [00:01<00:00, 52.36it/s]


item : CR , corporation : E , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.20it/s]


item : CR , corporation : E , location : S 


100%|██████████| 100/100 [00:01<00:00, 51.71it/s]


item : CB , corporation : A , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.32it/s]


item : CB , corporation : A , location : S 


100%|██████████| 100/100 [00:01<00:00, 52.35it/s]


item : CB , corporation : D , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.78it/s]


item : CB , corporation : D , location : S 
gd
item : CB , corporation : E , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.23it/s]


item : CB , corporation : E , location : S 
gd
item : CB , corporation : F , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.36it/s]


item : CB , corporation : F , location : S 
gd
item : RD , corporation : A , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.46it/s]


item : RD , corporation : A , location : S 


100%|██████████| 100/100 [00:01<00:00, 51.37it/s]


item : RD , corporation : C , location : J 
gd
item : RD , corporation : C , location : S 


100%|██████████| 100/100 [00:01<00:00, 52.80it/s]


item : RD , corporation : D , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.30it/s]


item : RD , corporation : D , location : S 


100%|██████████| 100/100 [00:01<00:00, 51.24it/s]


item : RD , corporation : E , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.43it/s]


item : RD , corporation : E , location : S 


100%|██████████| 100/100 [00:01<00:00, 51.27it/s]


item : RD , corporation : F , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.38it/s]


item : RD , corporation : F , location : S 
gd
item : BC , corporation : A , location : J 


100%|██████████| 100/100 [00:01<00:00, 50.79it/s]


item : BC , corporation : A , location : S 


100%|██████████| 100/100 [00:01<00:00, 50.90it/s]


item : BC , corporation : B , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.35it/s]


item : BC , corporation : B , location : S 


100%|██████████| 100/100 [00:01<00:00, 52.61it/s]


item : BC , corporation : C , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.35it/s]


item : BC , corporation : C , location : S 


100%|██████████| 100/100 [00:01<00:00, 52.51it/s]


item : BC , corporation : D , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.06it/s]


item : BC , corporation : D , location : S 
gd
item : BC , corporation : E , location : J 


100%|██████████| 100/100 [00:01<00:00, 51.22it/s]


item : BC , corporation : E , location : S 


100%|██████████| 100/100 [00:01<00:00, 51.17it/s]


### Submission

In [28]:
results = pd.DataFrame(results, columns = ['gd'])

for i in range(len(results)):
    if (i-1) % 7 == 0: # 1, 8, 15, 22
        results.iloc[i,-1] = 0


submission = pd.read_csv(DATA_PATH+'./sample_submission.csv')
submission['answer'] = results

submission.to_csv(SUBMISSION_PATH+'./d_linear_submission.csv', index=False)

In [29]:
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,2461.746826
1,TG_A_J_20230305,0.000000
2,TG_A_J_20230306,2372.402832
3,TG_A_J_20230307,2234.895264
4,TG_A_J_20230308,2584.550049
...,...,...
1087,RD_F_J_20230327,1467.568481
1088,RD_F_J_20230328,1285.734131
1089,RD_F_J_20230329,954.388123
1090,RD_F_J_20230330,948.465088


In [25]:
# 56
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,2593.700195
1,TG_A_J_20230305,0.000000
2,TG_A_J_20230306,2301.097656
3,TG_A_J_20230307,2439.053467
4,TG_A_J_20230308,2645.454102
...,...,...
1087,RD_F_J_20230327,1324.171509
1088,RD_F_J_20230328,1017.770020
1089,RD_F_J_20230329,397.086548
1090,RD_F_J_20230330,691.312317


In [26]:
# 140
submission

Unnamed: 0,ID,answer
0,TG_A_J_20230304,1603.417358
1,TG_A_J_20230305,0.000000
2,TG_A_J_20230306,2084.053955
3,TG_A_J_20230307,1671.266235
4,TG_A_J_20230308,2183.236816
...,...,...
1087,RD_F_J_20230327,1087.108032
1088,RD_F_J_20230328,1199.210083
1089,RD_F_J_20230329,1169.479614
1090,RD_F_J_20230330,1140.815796
