In [None]:
import os
import random
import math
import wandb
import time

from glob import glob
from copy import deepcopy
from tqdm.auto import tqdm
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader
from torch.nn.modules.loss import _Loss
from torch.optim.lr_scheduler import _LRScheduler

from sklearn.model_selection import StratifiedShuffleSplit, KFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler

device = torch.device("cuda" if torch.cuda.is_available()  else "cpu")

In [None]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

# LSTM approach

## Preprocessing

In [None]:
class MyDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        src, trg = self.data[idx]
        (last_2week, last_week) = src
        src_ = np.concatenate((last_2week[np.newaxis,:], last_week[np.newaxis,:]), axis=0) # 2*168
        src = torch.Tensor(src_)
        trg = torch.Tensor(trg)
        return src, trg

In [None]:
def split_data(data, number):
    source = []
    target = []

    for i in range(4, len(data)//(24*7)-2):
        last_2week = data[5*24 + (i)*7*24:5*24 + (i+1)*7*24, number+2]
        last_week = data[5*24 + (i+1)*7*24:5*24 + (i+2)*7*24, number+2]
        trg = data[5*24 + (i+2)*7*24:5*24 + (i+3)*7*24, number+2]
        source.append([last_2week, last_week]) # 5*24:5*24+7*24
        target.append(trg)
    print(len(source), len(target))
    train_data = list(zip(source[:int(len(source)*0.8)], target[:int(len(source)*0.8)]))
    valid_data = list(zip(source[int(len(source)*0.8):], target[int(len(source)*0.8):]))
    return train_data, valid_data

In [None]:
def preprocess_dataframe(df):
    df = df.copy()
    head = df.iloc[:1416,:]
    tmp_0222 = df.iloc[1416:1429,:]
    division = df[df['날짜']==20200229].iloc[:13,2:].sum().mean() / \
        ((df[df['날짜']==20200222].iloc[:13,2:].sum().mean() + df[df['날짜']==20200307].iloc[:13,2:].sum().mean())/2)
    tmp_0222_ = (df[df['날짜']==20200222].iloc[13:,2:].reset_index(drop=True) + \
                 df[df['날짜']==20200307].iloc[13:,2:].reset_index(drop=True))/2*division
    tmp_0222__ = pd.concat((tmp_0222.iloc[:11,:2].reset_index(drop=True), tmp_0222_),axis=1)
    for_0222 = pd.concat((tmp_0222, tmp_0222__), axis=0)
    middle = df.iloc[1429:2125,:]
    tmp_0330 = pd.concat((df.iloc[:2,:2].reset_index(drop=True), df.iloc[2125:2127].iloc[:2,2:].reset_index(drop=True) * 2), axis=1)
    division = tmp_0330.iloc[:2,2:].sum().mean() / \
        ((df[df['날짜']==20200406].iloc[:2,2:].reset_index(drop=True).sum().mean()+df[df['날짜']==20200323].iloc[:2,2:].reset_index(drop=True).sum().mean())/2)
    tmp_0330_ = pd.concat((df.iloc[2125:2127,:2].reset_index(drop=True), ((df[df['날짜']==20200406].iloc[2:,2:].reset_index(drop=True)+ \
                df[df['날짜']==20200323].iloc[2:,2:].reset_index(drop=True))/2)*division), axis=1)
    tmp_0330_['날짜'] = 20200330.0
    for_0330 = pd.concat((tmp_0330.reset_index(drop=True), tmp_0330_.reset_index(drop=True)), axis=0)
    tail = df.iloc[2127:,:]
    pre_df = pd.concat((head, for_0222, middle, for_0330, tail), axis=0).reset_index(drop=True)
    return pre_df

## Model

In [None]:
class Swish(nn.Module):
    def __init__(self):
        super().__init__()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return x * self.sigmoid(x)

In [None]:
class MyModel(nn.Module):
    def __init__(self,
                 input_size=168,      # input 길이는 168시간(7일 X 24시간)
                 hidden_size=1024,
                 output_size=168,      # output 길이는 168시간(7일 X 24시간)
                 num_layers=2,
                 dropout=0,
                 batch_first=True): 
        super(MyModel, self).__init__()

        self.hidden_size = hidden_size
        self.lstm1 = nn.LSTM(input_size,
                             hidden_size,
                             dropout=dropout,
                             num_layers=num_layers,
                             batch_first=True)
        self.lstm2 = nn.LSTM(hidden_size, 
                             hidden_size,
                             dropout=dropout,
                             num_layers=num_layers,
                             batch_first=True)
        self.linear = nn.Linear(hidden_size*2, 
                                output_size)
        # self.out = nn.Linear(hidden_size, output_size)
        self.activation = Swish()


    def forward(self, x, h_in, c_in):
        h_in = nn.Parameter(h_in.float(), requires_grad=True)      # gradient descent로 업데이트 되는(requires_grad=True), hidden state 초기값 파라미터 생성 
        c_in = nn.Parameter(c_in.float(), requires_grad=True)      # gradient descent로 업데이트 되는(requires_grad=True), cell state 초기값 파라미터 생성

        # Layer 1
        lstm_out, (h_1, c_1) = self.lstm1(x, (h_in, c_in))
        lstm_out = self.activation(lstm_out)

        # Layer2
        lstm_out, (h_2, c_2) = self.lstm2(lstm_out, (h_1, c_1))
        lstm_out = self.activation(lstm_out) # L, B, hidden = 2, 4, 1024 -> 1, 4, 2048

        # Final
        lstm_out_ = torch.cat((lstm_out[:,0,:], lstm_out[:,1,:]), dim=-1)
        out = self.linear(lstm_out_)
        # out = self.out(self.activation(out))
        
        return out, (h_1, c_1) # (h_2, c_2)

## Initialization

In [None]:
def init_weight(model, kind='xavier'):
    for name, i in model.named_parameters():
        if kind == 'xavier':
            if i.dim() < 2:
                continue
            if 'weight' in name:
                init.xavier_normal_(i, gain=1.0)
            elif 'bias' in name:
                init.xavier_uniform_(i, gain=1.0)
            else:
                pass
        elif kind == 'kaiming':
            if i.dim() < 2:
                continue
            if 'weight' in name:
                init.kaiming_normal_(i)
            elif 'bias' in name:
                init.kaiming_uniform_(i)
            else:
                pass

## Loss

In [None]:
class MyLoss(_Loss):
    def __init__(self): # RMSE
        super(MyLoss, self).__init__()
        self.lossMSE = nn.MSELoss() 
        
    def forward(self, preds, trg):
        return torch.sqrt(self.lossMSE(preds, trg))

## Metric

In [None]:
def mse_error(preds, trg):
    summation = ((preds - trg) ** 2).mean() # 1*168
    return summation

## Train Functions

In [None]:
def train_one_epoch(model, criterion, train_loader, optimizer, scheduler, device, mini, maxi):
    model.train()
    
    losses = 0
    total_mse = 0
    for idx, (src, trg) in tqdm(enumerate(train_loader)):
        src, trg = src.to(device).float(), trg.to(device).float()

        (h_0, c_0) = (torch.zeros(model.lstm1.num_layers, src.size(0), model.hidden_size, requires_grad=True).to(device),
                     torch.zeros(model.lstm1.num_layers, src.size(0), model.hidden_size, requires_grad=True).to(device))
        
        outs, (h_in, c_in) = model(src, h_0, c_0)

        loss = criterion(outs, trg)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        losses += loss.item()

        outs, trg = (outs.detach().cpu().numpy()*(maxi-mini) + mini), \
                    (trg.detach().cpu().numpy()*(maxi-mini) + mini)

        mse = mse_error(outs, trg)
        total_mse += mse

    total_mse = total_mse

    return losses, total_mse
        
def valid_one_epoch(model, criterion, valid_loader, device, mini, maxi):
    model.eval()
    print("validation")
    
    losses = 0
    total_mse = 0
    with torch.no_grad():
        for idx, (src, trg) in tqdm(enumerate(valid_loader)):
            src, trg = src.to(device).float(), trg.to(device).float()

            (h_0, c_0) = (torch.zeros(model.lstm1.num_layers, src.size(0), model.hidden_size, requires_grad=False).to(device),
                        torch.zeros(model.lstm1.num_layers, src.size(0), model.hidden_size, requires_grad=False).to(device))
            
            outs, (h_in, c_in) = model(src, h_0, c_0)

            loss = criterion(outs, trg)
            
            losses += loss.item()
            outs, trg = (outs.detach().cpu().numpy()*(maxi-mini) + mini), \
                        (trg.detach().cpu().numpy()*(maxi-mini) + mini)
            mse = mse_error(outs, trg)
            total_mse += mse            

    total_mse = total_mse
    
    return losses, total_mse

## Early Stop

In [None]:
class EarlyStopper():
    def __init__(self, patience: int)-> None:
        self.patience = patience

        self.patience_counter = 0
        self.best_mse = 1e9
        self.stop = False
        self.save_model = False

    def check_early_stopping(self, score: float)-> None:
        if self.best_mse == 1e9:
            self.best_mse = score
            return None

        elif score >= self.best_mse:
            self.patience_counter += 1
            self.save_model = False
            if self.patience_counter == self.patience:
                self.stop = True
                
        elif score < self.best_mse:
            self.patience_counter = 0
            self.save_model = True
            self.best_mse = score
        print("best_mse", self.best_mse, "best_rmse", np.sqrt(self.best_mse))

## Scheduler

In [None]:
class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """

    def __init__(self,
                 optimizer: torch.optim.Optimizer,
                 first_cycle_steps: int,
                 cycle_mult: float = 1.,
                 max_lr: float = 0.1,
                 min_lr: float = 0.001,
                 warmup_steps: int = 0,
                 gamma: float = 1.,
                 last_epoch: int = -1
                 ):
        assert warmup_steps < first_cycle_steps

        self.first_cycle_steps = first_cycle_steps  # first cycle step size
        self.cycle_mult = cycle_mult  # cycle steps magnification
        self.base_max_lr = max_lr  # first max learning rate
        self.max_lr = max_lr  # max learning rate in the current cycle
        self.min_lr = min_lr  # min learning rate
        self.warmup_steps = warmup_steps  # warmup step size
        self.gamma = gamma  # decrease rate of max learning rate by cycle

        self.cur_cycle_steps = first_cycle_steps  # first cycle step size
        self.cycle = 0  # cycle count
        self.step_in_cycle = last_epoch  # step size of the current cycle

        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)

        self.init_lr()

    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)

    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr) * self.step_in_cycle / self.warmup_steps + base_lr for base_lr in
                    self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle - self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int(
                    (self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(
                        self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch

        self.max_lr = self.base_max_lr * (self.gamma ** self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

## Train

In [None]:
!wandb login

In [None]:
def setting(number):
    CFG = {
        'model_name':'lstm',
        'seed':2022,
        'batch_size': 4,
        'epoch': 100,
        'initialization': "kaiming", # kaiming, xavier
        'input_size':168,
        'hidden_size':1024,
        'output_size':168,
        'num_layers':2,
        'road_number':number,
        'save_dir':'lstm_2week_minmax_swish',
        'dropout':0.
    }

    root_dir = '/content/drive/Othercomputers/내 컴퓨터/workspace/ml_cartraffic'

    wandb.config = CFG
    experiment_name = 'lstm_minmax_2layer' # BrightContrast Vertical
    run = wandb.init(project=f"{CFG['model_name']}", settings=wandb.Settings(start_method="thread"), name=f"{experiment_name}_{CFG['road_number']}")

    seed_everything(CFG['seed'])
    # device 설정
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Data 설정
    df_train = pd.read_csv(root_dir + '/data/train.csv')
    df_valid = pd.read_csv(root_dir + '/data/validate.csv')
    total_df = pd.concat((df_train.iloc[:-24*7,:], df_valid.iloc[:,:]), axis=0)
    print(total_df)

    pre_df = preprocess_dataframe(total_df)
    mini, maxi = pre_df.iloc[:,CFG['road_number']+2].min(), \
                pre_df.iloc[:,CFG['road_number']+2].max()
    print('min, max', mini, maxi)

    scaler = MinMaxScaler()
    scaler.fit(pre_df)
    pre_df = scaler.transform(pre_df)

    train_data, valid_data = split_data(pre_df, CFG['road_number'])

    train_dataset = MyDataset(train_data)
    valid_dataset = MyDataset(valid_data)

    train_loader = DataLoader(
        train_dataset, 
        batch_size=CFG['batch_size'],
        num_workers=0,
        shuffle=True,
        pin_memory=use_cuda,
        drop_last=False,)

    valid_loader = DataLoader(
        valid_dataset, 
        batch_size=CFG['batch_size'],
        num_workers=0,
        shuffle=False,
        pin_memory=use_cuda,
        drop_last=False)

    # Model, Loss, scheduler, optimizer setting
    model = MyModel(input_size=CFG['input_size'],
                    hidden_size=CFG['hidden_size'],
                    output_size=CFG['output_size'],
                    num_layers=CFG['num_layers']).to(device)

    init_weight(model, kind=CFG['initialization'])

    criterion = MyLoss().to(device)

    cosine_annealing_scheduler_arg = dict(
        first_cycle_steps=len(train_dataset)//CFG['batch_size'] * CFG['epoch'],
        cycle_mult=1.0,
        max_lr=1e-04,
        min_lr=1e-07,
        warmup_steps=len(train_dataset)//CFG['batch_size'] * 3,
        gamma=0.9
    )

    optimizer = optim.Adam(model.parameters(), lr=0.000, weight_decay=0)
    scheduler = CosineAnnealingWarmupRestarts(optimizer, **cosine_annealing_scheduler_arg)

    early_stopper = EarlyStopper(patience=100)
    os.makedirs(root_dir + f"/{CFG['save_dir']}", exist_ok=True)

    print('Start Training!')
    for i in range(CFG['epoch']):
        print(f"Epoch :", i)
        train_losses, train_mse = train_one_epoch(model, criterion, train_loader, optimizer, scheduler, device, mini, maxi)
        print("Train loss, score:", train_losses / len(train_loader), np.sqrt(train_mse / len(train_loader.dataset)))
        valid_losses, valid_mse = valid_one_epoch(model, criterion, valid_loader, device, mini, maxi)
        print("Valid loss, score:", valid_losses / len(valid_loader), np.sqrt(valid_mse / len(valid_loader.dataset)))
        early_stopper.check_early_stopping(valid_mse / len(valid_loader.dataset))

        wandb_dict = {
            'train loss': train_losses / len(train_loader),
            'train score': np.sqrt(train_mse / len(train_loader.dataset)),
            'valid loss': valid_losses / len(valid_loader),
            'valid score': np.sqrt(valid_mse / len(valid_loader.dataset)),
            'learning rate': scheduler.get_lr()[0]
        }

        wandb.log(wandb_dict)

        print("learning rate :", scheduler.get_lr())

        if early_stopper.save_model == True:
            dic = {
                'model':model.state_dict(),
                'optimizer':optimizer.state_dict(),
                'scheduler':scheduler.state_dict(),
            }
            torch.save(dic, root_dir + f"/{CFG['save_dir']}/{CFG['road_number']:02}_best.pth")
            time.sleep(1)
            print('save_model')

        if early_stopper.stop:
            break

    os.rename(
        root_dir + f"/{CFG['save_dir']}/{CFG['road_number']:02d}_best.pth", 
        root_dir + f"/{CFG['save_dir']}/{CFG['road_number']:02d}_{np.sqrt(early_stopper.best_mse):.2f}_{experiment_name}.pth"
        )

In [None]:
for i in range(35): # 35 column 학습
    setting(i)

## Inference

In [None]:
class MyTestDataset(Dataset):
    def __init__(self, data, road_number):
        super().__init__()
        self.data = data
        self.road_number = road_number
    
    def __len__(self):
        return 1
    
    def __getitem__(self, idx):
        src1 = self.data[:7*24, self.road_number]
        src2 = self.data[7*24:7*2*24, self.road_number]
        trg = self.data[7*2*24:7*3*24, self.road_number]
        src_ = np.concatenate((src1[np.newaxis,:], src2[np.newaxis,:]), axis=0)
        src = torch.Tensor(src_)
        trg = torch.Tensor(trg)
        return src, trg

In [None]:
def infer_setting(number, pths):    
    CFG = {
        'seed':2022,
        'batch_size': 4,
        'epoch': 100,
        'initialization': "kaiming", # kaiming, xavier
        'input_size':168,
        'hidden_size':1024,
        'output_size':168,
        'num_layers':2,
        'road_number':number
    }
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    root_dir = '/content/drive/Othercomputers/내 컴퓨터/workspace/ml_cartraffic'
    test_df = pd.read_csv(root_dir + '/data/test.csv')

    df_train = pd.read_csv(root_dir + '/data/train.csv')
    df_valid = pd.read_csv(root_dir + '/data/validate.csv')
    total_df = pd.concat((df_train.iloc[:-24 * 7,:], df_valid), axis=0)
    pre_df = preprocess_dataframe(total_df)

    test_df_ = pd.concat((df_valid.iloc[-(24*7*2):-24*7,:], test_df.iloc[:]),axis=0)

    scaler = MinMaxScaler()
    scaler.fit(pre_df.iloc[:,2:])
    test_df_ = scaler.transform(test_df_.iloc[:,2:])

    test_dataset = MyTestDataset(test_df_, CFG['road_number'])

    test_loader = DataLoader(
        test_dataset, 
        batch_size=1,
        num_workers=0,
        shuffle=False,
        pin_memory=use_cuda,
        drop_last=False)
    
    model = MyModel(input_size=CFG['input_size'],
                    hidden_size=CFG['hidden_size'],
                    output_size=CFG['output_size'],
                    num_layers=CFG['num_layers'])
    
    checkpoint = torch.load(pths[number])
    model.load_state_dict(checkpoint['model'])
    model.to(device)

    preds = []

    for iter_, sample in enumerate(test_loader):
        (src, trg) = sample
        (h_in, c_in) = (torch.zeros(model.lstm1.num_layers, src.size(0), model.hidden_size, requires_grad=False).to(device),
                        torch.zeros(model.lstm1.num_layers, src.size(0), model.hidden_size, requires_grad=False).to(device))
        
        src = src.to(device)
        pred, (h_in, c_in) = model(src, h_in, c_in)

        preds.append(pred)
    return preds, scaler

In [None]:
root_dir = '/content/drive/Othercomputers/내 컴퓨터/workspace/ml_cartraffic'
pths = sorted(glob(root_dir + '/lstm_2week_minmax_since4/*'))

submission_table = pd.read_csv(root_dir + '/data/sample_submission.csv')

# for i in range(35):
#     preds, scaler = infer_setting(i, pths)
#     submission_table[submission_table.columns[i+1]] = preds[0][0].detach().cpu().numpy()

## RMSE 계산

In [None]:
summation = 0
cnt = 0
for i in pths:
    if 'best' in i:
        float(i.split('/')[-1].split('_')[1])
    else:
        summation += float(i.split('/')[-1].split('_')[1])
    # print(i.split('/')[-1].split('_')[1])
    # break
summation / 35

2352.378571428571

In [None]:
scale_submission = scaler.inverse_transform(submission_table.iloc[:,1:])
scale_submission

In [None]:
scale_submission_ = np.where(scale_submission <= 0, 1, scale_submission)

In [None]:
df_values = pd.DataFrame(scale_submission_, columns=submission_table.columns[1:])

In [None]:
submission = pd.concat((submission_table.iloc[:,:1].reset_index(drop=True), 
           df_values.reset_index(drop=True)), axis=1)

In [None]:
submission.to_csv(root_dir + '/submission_lstm_2week_2layer_since4.csv', index=False)

# MLP approach

## Preprocess

In [None]:
def scaling(df, y_cols, scaler=None):
    if scaler is None:
        scaler = StandardScaler()
        df[y_cols] = scaler.fit_transform(df[y_cols])
        return df, scaler
    df[y_cols] = scaler.transform(df[y_cols])
    return df

def inverse_scaling(df, y_cols, scaler=None):
    try: df[y_cols] = scaler.inverse_transform(df[y_cols])
    except: df = scaler.inverse_transform(df)
    return df

def col_to_date(csv_path):
    df = pd.read_csv(csv_path)
    df = df.rename(columns={'날짜': 'Date', '시간': 'Hour'})
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d')
    return df
    
def preprocess_input_data(df, y_cols, d_hour=168, steps=1, train=True):    
    for i in range(d_hour, d_hour+steps):
        for y in y_cols:
            df[f'{y}D-{i}'] = df[y].shift(i)

    x_cols = [i for i in df.columns if i not in y_cols and 'Date' not in i and 'Num' not in i] 
    if train:
        df = df.dropna(axis=0)
    return df, x_cols

def kf_split(data, fold, n_split, seed, shuffle=True):
    if shuffle:
        kf = KFold(n_splits=n_split, random_state=seed, shuffle=True)
    else:
        kf = KFold(n_splits=n_split)
        
    for i, (train_index, valid_index) in enumerate(kf.split(data)):
        if i == fold:
            _train, _valid = train_index, valid_index
    return _train, _valid

In [None]:
y_cols = pd.read_csv('./data/train.csv').columns[2:]
train_df = col_to_date('./data/train.csv')
valid_df = col_to_date('./data/validate.csv')

total_df = pd.concat([train_df, valid_df], axis=0)
total_df = total_df.drop_duplicates(subset=['Date', 'Hour']).reset_index().drop(['index'], axis=1)

# 이상치 조정
total_df.loc[total_df['10'] < 100, y_cols] = np.nan
total_df = total_df.fillna(method='ffill')
total_df["DayNum"] = total_df.Date.dt.weekday

# 결측치 추가
_feb = {'date': '2020-02-29', 'day_num': 5, 'hours': range(13, 24, 1)}
_mar = {'date': '2020-03-30', 'day_num': 0, 'hours': range(2, 24, 1)}

group_data = total_df.groupby(['DayNum', 'Hour']).apply(lambda x: np.mean(x))

feb, mar = [], []
for hour in _feb['hours']:
    temp_array = np.array([datetime.strptime(_feb['date'], '%Y-%m-%d')])
    temp_array = np.append(temp_array, group_data.loc[_feb['day_num']].loc[hour].values)
    feb.append(temp_array)

for hour in _mar['hours']:
    temp_array = np.array([datetime.strptime(_mar['date'], '%Y-%m-%d')])
    temp_array = np.append(temp_array, group_data.loc[_mar['day_num']].loc[hour].values)
    mar.append(temp_array)

feb_df = pd.DataFrame(feb, columns=total_df.columns)
mar_df = pd.DataFrame(mar, columns=total_df.columns)

total_df = pd.concat([total_df, feb_df, mar_df], axis=0)
total_df['Date'] = pd.to_datetime(total_df['Date'])
total_df = total_df.sort_values(['Date', 'Hour'], ascending=[True, True])
total_df = total_df.reset_index().drop(['index'], axis=1)

test_df = col_to_date('./data/test.csv')
test_df = test_df[test_df['10'] == -999].reset_index().drop(['index'], axis=1)

# train / test data
final_train_df = total_df.copy()
final_test_df = test_df.copy()

## Train

In [None]:
class CustomDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        x = self.x_data[idx]
        ys = self.y_data[idx]
        return torch.tensor(x).float(), torch.tensor(ys).float()

class MLP(nn.Module):
    def __init__(self, in_features, out_features=35):
        super().__init__()
        self.fc1 = nn.Linear(in_features, 32)
        self.fc2 = nn.Linear(32, 32)
        self.head = nn.Linear(32, out_features)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        x = self.act(x)
        x = self.head(x)
        return x

class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, yhat, y):
        return torch.sqrt(self.mse(yhat, y))

In [None]:
def get_model(in_f, out_f, lr=3e-4):
    model = MLP(in_features=in_f, out_features=out_f)
    model = model.to(device)
    loss_fn = RMSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    return model, loss_fn, optimizer

def train_epoch(model, optimizer, loss_fn, loader):
    model.train()
    losses = 0.
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        outputs = model(x)

        loss = loss_fn(outputs, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses += loss
    return losses/len(loader)

def validate(model, loss_fn, loader, scaler):
    model.eval()
    losses = scale_losses = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)

            loss = loss_fn(outputs, y)

            scale_outputs = torch.FloatTensor(scaler.inverse_transform(outputs.cpu()))
            scale_y = torch.FloatTensor(scaler.inverse_transform(y.cpu()))
            scale_loss = loss_fn(scale_outputs, scale_y)

            losses += loss
            scale_losses += scale_loss
    return loss/len(loader), scale_losses/len(loader)

def run_epoch(epochs, train_loader, valid_loader, in_f, out_f, y_scaler, init=None, lr=3e-4, wandb=None):
    start = time.time()    
    model, loss_fn, optimizer = get_model(in_f, out_f, lr=lr)
    best_model, best_loss, best_scale, best_epoch = None, float('inf'), float('inf'), 0
    
    if init is not None:
        init_weight(model, kind=init)
        
    for i in range(epochs):
        train_loss = train_epoch(model, optimizer, loss_fn, train_loader)
        valid_loss, scale_loss = validate(model, loss_fn, valid_loader, y_scaler)
        
        if best_loss > valid_loss:
            best_model, best_loss = deepcopy(model.state_dict()), valid_loss
            
        learning_rate = optimizer.param_groups[0]['lr']
        if wandb is not None:
            wandb_dict = {
                'train loss': train_loss,
                'valid loss': valid_loss,
                'RMSE': scale_loss,
            }
            wandb.log(wandb_dict)
        print(f'epoch {i+1 :2d} train {train_loss :.4f} valid {valid_loss :.4f} {scale_loss :.0f} LR {learning_rate :.5f} TIME {time.time() - start :.2f}s')
    return best_model

In [None]:
df = final_train_df.copy()
df, y_scaler = scaling(df, y_cols)
df, x_cols = preprocess_input_data(df, y_cols, d_hour=168, steps=1)

x_data = df[x_cols].values
y_data = df[y_cols].values

train_index, valid_index = kf_split(x_data, 1, 5, seed=2022)
x_train, y_train = x_data[train_index], y_data[train_index]
x_valid, y_valid = x_data[valid_index], y_data[valid_index]

train_set = CustomDataset(x_train, y_train)
valid_set = CustomDataset(x_valid, y_valid)

train_loader = DataLoader(train_set, batch_size=1, pin_memory=True, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_set, batch_size=1, pin_memory=True)

In [None]:
epochs = 30
best_model = run_epoch(epochs, train_loader, valid_loader, in_f=len(x_cols), out_f=len(y_cols), y_scaler=y_scaler)

## Inference

In [None]:
def inference(state_dict: dict, x_test: np.array, x_cols):
    model = MLP(in_features=len(x_cols), out_features=35)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    logits = model(torch.FloatTensor(x_test).to(device))
    return logits

In [None]:
s = pd.Series([9999] * 3480)
_train = final_train_df.copy().set_index(s)
_train = scaling(_train, y_cols, y_scaler)

_test = final_test_df.copy()
_test = pd.concat([_train, _test], axis=0)
_test, _ = preprocess_input_data(_test, y_cols, train=False)

in_test = _test.copy()
for v in range(168):
    test_pred = inference(best_model, in_test.loc[v][x_cols].values.astype('float64').reshape(1, -1), x_cols)
    in_test.loc[v, y_cols] = test_pred.squeeze().detach().cpu().numpy()

out_test = in_test.copy()
out_test = inverse_scaling(out_test, y_cols, y_scaler)
result = out_test[y_cols].loc[0:167]

submission = pd.read_csv('./data/sample_submission.csv')
for i in range(168):
    submission.loc[i, y_cols] = result.loc[i].values

submission.to_csv('./_submission.csv', index=False)

# Ensemble

In [None]:
df1 = pd.read_csv('/content/drive/Othercomputers/내 컴퓨터/workspace/ml_cartraffic/submission_lstm_2week_2layer_since4.csv')
df2 = pd.read_csv('/content/drive/Othercomputers/내 컴퓨터/workspace/ml_cartraffic/7_2760.1_submission.csv')
timestamp = df1.iloc[:,0]
value = (df1.iloc[:,1:] + df2.iloc[:,1:]) * 0.5
pd.concat((timestamp, value), axis=1).to_csv('/content/drive/Othercomputers/내 컴퓨터/workspace/ml_cartraffic/ensemble.csv', index=False)