In [1]:
import warnings
warnings.filterwarnings(action='ignore')

import os
import gc
import math
import random
import pickle
import pandas as pd
import numpy as np
import multiprocessing
from tqdm.auto import tqdm

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler

from transformers import get_cosine_schedule_with_warmup

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset, sampler

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [3]:
random_seed = 41

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed) 
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    os.environ["PYTHONHASHSEED"] = str(seed)
    
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

seed_everything(seed=random_seed) # Seed 고정

In [4]:
train = pd.read_csv("./data/df_train.csv")
test = pd.read_csv("./data/df_test.csv")
train.shape, test.shape

((262, 48), (175, 47))

In [5]:
y = torch.LongTensor(train['class'].values)
X = train.drop(['id', 'class'], axis=1).to_numpy()
X_test = test.drop(['id'], axis=1).to_numpy()
y

tensor([1, 2, 1, 0, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 0, 0, 0, 0, 2, 1, 1, 0, 1,
        2, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 2, 1, 0, 1, 1, 1, 1, 1, 0, 2, 1, 1, 0,
        2, 0, 0, 2, 0, 2, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 0, 2, 2, 0, 1, 2, 2, 2,
        0, 1, 1, 1, 2, 1, 2, 0, 1, 1, 2, 0, 2, 0, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1,
        2, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2,
        1, 1, 2, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 0, 1, 2, 0, 2, 1, 1, 2, 2, 1,
        1, 1, 2, 2, 0, 1, 0, 2, 2, 2, 1, 0, 1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 0,
        0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2, 0, 2, 2, 1, 1, 2, 1, 2, 2, 1,
        1, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 0, 2, 2, 1, 2, 1, 2, 1, 0, 0, 1, 2,
        0, 0, 2, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 2, 2, 1, 2, 1, 2, 1, 0, 2, 0,
        0, 2, 2, 1, 1, 1, 0, 1, 0, 1, 0, 2, 1, 0, 1, 1, 1, 1, 2, 0, 0, 1])

In [6]:
y2 = nn.functional.one_hot(y, num_classes=3).to(device).long()

In [7]:
total = np.concatenate([X, X_test], axis=0)
total.shape

(437, 46)

In [8]:
scaler = StandardScaler()
total = scaler.fit_transform(total)
# total = np.expand_dims(total, axis=1)
total.shape

(437, 46)

In [9]:
class Encoder(nn.Module):
    def __init__(self, n_features, latent_dim):
        super().__init__()
        
        self.lstm0 = nn.LSTM(
            input_size=n_features,
            hidden_size=latent_dim**2,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )
        self.lstm1 = nn.LSTM(
            input_size=latent_dim**2,
            hidden_size=latent_dim**3,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )

        self.lstm2 = nn.LSTM(
            input_size=latent_dim**3,
            hidden_size=latent_dim*3,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )
        
        self.lstm3 = nn.LSTM(
            input_size=latent_dim*3,
            hidden_size=latent_dim,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )

    def forward(self, x):
        x1, (_, _) = self.lstm0(x)
        x2, (_, _) = self.lstm1(x1)
        x3, (_, _) = self.lstm2(x2)
        x4, (h_n, _) = self.lstm3(x3)
        
        return h_n.permute(1, 0, 2)


class Decoder(nn.Module):
    def __init__(self, n_features, latent_dim):
        super().__init__()
        self.latent_dim = latent_dim
        
        self.lstm0 = nn.LSTM(
            input_size=latent_dim,
            hidden_size=latent_dim*3,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )
        self.lstm1 = nn.LSTM(
            input_size=latent_dim*3,
            hidden_size=latent_dim**3,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )
        
        self.lstm2 = nn.LSTM(
            input_size=latent_dim**3,
            hidden_size=latent_dim**2,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )
        
        self.lstm3 = nn.LSTM(
            input_size=latent_dim**2,
            hidden_size=latent_dim*2,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )        
        
        
        self.linear = nn.Linear(in_features=latent_dim*2, out_features=n_features)

    def forward(self, x):
        x = x.repeat(1, 31, 1)
        
        x, (_, _) = self.lstm0(x)
        x, (_, _) = self.lstm1(x)
        x, (_, _) = self.lstm2(x)
        x, (_, _) = self.lstm3(x)            
        
        x = self.linear(x)

        return x


class AutoEncoder(nn.Module):
    def __init__(self, n_features=46, latent_dim=6, device=None):
        super().__init__()
        
        self.encoder = Encoder(n_features, latent_dim).to(device)
        self.decoder = Decoder(n_features, latent_dim).to(device)

    def forward(self, x):
        x1 = self.encoder(x)
        x2 = self.decoder(x1)
        
        return x2

In [10]:
class Encoder(nn.Module):
    def __init__(self, n_features, latent_dim):
        super().__init__()
        
        self.lstm0 = nn.Linear(n_features, latent_dim**2)
        self.lstm1 = nn.Linear(latent_dim**2, latent_dim*3)        
        self.lstm2_1 = nn.Linear(latent_dim*3, 3)
        self.lstm2_2 = nn.Linear(latent_dim*3, latent_dim-3)

    def forward(self, x):
        x1 = self.lstm0(x)
        x2 = self.lstm1(x1)
        x3_1 = self.lstm2_1(x2)
        x3_2 = self.lstm2_2(x2)
        
        return x3_1, x3_2


class Decoder(nn.Module):
    def __init__(self, n_features, latent_dim):
        super().__init__()
        self.latent_dim = latent_dim
        
        self.lstm0 = nn.Linear(latent_dim, latent_dim*3)
        self.lstm1 = nn.Linear(latent_dim*3, latent_dim**2)        
        self.lstm2 = nn.Linear(latent_dim**2, latent_dim*2)        
        
        
        self.linear = nn.Linear(in_features=latent_dim*2, out_features=n_features)

    def forward(self, x):        
        x = self.lstm0(x)
        x = self.lstm1(x)
        x = self.lstm2(x)
        
        x = self.linear(x)

        return x


class AutoEncoder(nn.Module):
    def __init__(self, n_features=46, latent_dim=12, device=None):
        super().__init__()
        
        self.encoder = Encoder(n_features, latent_dim).to(device)
        self.decoder = Decoder(n_features, latent_dim).to(device)

    def forward(self, x):
        x1_1, x1_2 = self.encoder(x)
        x1 = torch.concat([x1_1, x1_2], axis=1)
        x2 = self.decoder(x1)
        
        return x1_1, x2

In [11]:
CFG = {
    'EPOCHS':30,
    'LEARNING_RATE':0.001,
    'BATCH_SIZE':437,
}

In [12]:
def validation(model, criterion, test_loader, device):
    model.eval()
    
    model_preds = []
    true_labels = []
    
    val_loss = []

    with torch.no_grad():
        for x, label in iter(test_loader):
            x, label = x.to(device), label.to(device)

            model_pred = model(x)

            loss = criterion(model_pred, label)

            val_loss.append(loss.item())

    return np.mean(val_loss)

In [13]:
# DataLoader 정의
train_dataset = TensorDataset(torch.from_numpy(total).type(torch.float), torch.from_numpy(total).type(torch.float))
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=2, worker_init_fn=seed_worker)

# 학습 모델 설정
model = AutoEncoder().to(device)

criterion1 = nn.CrossEntropyLoss().to(device)
criterion2 = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])  # Adam
scheduler = get_cosine_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=len(train_loader) * int(CFG['EPOCHS']*0.5),
    num_training_steps=len(train_loader) * CFG['EPOCHS']
)


best_score = 100

# train
torch.backends.cudnn.benchmark = True
for epoch in range(1,CFG['EPOCHS']+1):
    model.train()
    train_loss = []
    for x, label in iter(train_loader):            
        x, label = x.to(device), label.to(device)

        optimizer.zero_grad()

        pred_label, pred_vector = model(x)
        target = pred_label[:len(y2), :3] # 앞의 세 개를 label로 유도
        target2 = torch.softmax(target, axis=1)    

        loss1 = criterion1(y2.float(), target2)
        loss2 = criterion2(label, pred_vector)
        
        loss = loss1 + loss2
        loss.backward()
        optimizer.step()
        scheduler.step()

        train_loss.append(loss.item())

    tr_loss = np.mean(train_loss)

    print(f'Epoch [{epoch}], Loss1 : {loss1:.6f}, Loss2 : {loss2:.6f}, Train Loss : [{tr_loss:.5f}]')
    # print(f'Epoch [{epoch}], Train Loss : [{tr_loss:.5f}]')

    if best_score > tr_loss:
        torch.save(model.state_dict(), f'./models/AutoEncoder_total.pt')
        best_score = tr_loss

Epoch [1], Loss1 : 1.224556, Loss2 : 1.017315, Train Loss : [2.24187]
Epoch [2], Loss1 : 1.232138, Loss2 : 1.017315, Train Loss : [2.24945]
Epoch [3], Loss1 : 1.229296, Loss2 : 1.016840, Train Loss : [2.24614]
Epoch [4], Loss1 : 1.227305, Loss2 : 1.015908, Train Loss : [2.24321]
Epoch [5], Loss1 : 1.229455, Loss2 : 1.014587, Train Loss : [2.24404]
Epoch [6], Loss1 : 1.224903, Loss2 : 1.012890, Train Loss : [2.23779]
Epoch [7], Loss1 : 1.227379, Loss2 : 1.010896, Train Loss : [2.23828]
Epoch [8], Loss1 : 1.224916, Loss2 : 1.008608, Train Loss : [2.23352]
Epoch [9], Loss1 : 1.222606, Loss2 : 1.006042, Train Loss : [2.22865]
Epoch [10], Loss1 : 1.219809, Loss2 : 1.003129, Train Loss : [2.22294]
Epoch [11], Loss1 : 1.226105, Loss2 : 0.999765, Train Loss : [2.22587]
Epoch [12], Loss1 : 1.224986, Loss2 : 0.995720, Train Loss : [2.22071]
Epoch [13], Loss1 : 1.220516, Loss2 : 0.990626, Train Loss : [2.21114]
Epoch [14], Loss1 : 1.221018, Loss2 : 0.983960, Train Loss : [2.20498]
Epoch [15], Los