In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

import torchvision.models as models
import torchvision.datasets as dset
import torchvision.transforms as transforms

import os
import cv2
import copy
import time
from random import *
from collections import defaultdict

from sklearn.model_selection import train_test_split

In [49]:
train = pd.read_csv('train.csv')
test  = pd.read_csv('test.csv')
submission = pd.read_csv('submission.csv')

In [50]:
x_train = np.concatenate(
    [
        pd.get_dummies(train['letter']).values.reshape(-1, 1, 26),
        (train[[str(i) for i in range(784)]] / 255.).values.reshape(-1, 1, 784)
    ],
    axis=2
)
y_train = train['digit'].values

In [51]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=123)

In [52]:
# tensor로 형변환
x_train = torch.Tensor(x_train)
x_valid = torch.Tensor(x_valid)
y_train = torch.LongTensor(y_train)
y_valid = torch.LongTensor(y_valid)

In [53]:
train_data = TensorDataset(
    x_train[:, :, :26], # Letter
    x_train[:, :, 26:].reshape(-1, 1, 28, 28), # Image
    y_train # Digit
)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=16)

valid_data = TensorDataset(
    x_valid[:, :, :26],
    x_valid[:, :, 26:].reshape(-1, 1, 28, 28),
    y_valid
)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=16)

In [108]:
class customCNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Letter의 Convolution Block
        self.layer1_1 = self.conv_module_1d(1, 16)
        self.layer1_2 = self.conv_module_1d(16, 32)
        self.layer1_3 = self.conv_module_1d(32, 64)
        self.layer1_4 = self.conv_module_1d(64, 128)
 
        # Image의 Convolution Block
        self.layer2_1 = self.conv_module_2d(1, 16)
        self.layer2_2 = self.conv_module_2d(16, 32)
        self.layer2_3 = self.conv_module_2d(32, 64)
        self.layer2_4 = self.conv_module_2d(64, 128)
        self.layer2_5 = self.conv_module_2d(128, 256)
        self.layey2_6 = self.conv_module_2d(256, 512)

        self.out = nn.Sequential(
            nn.Linear(204032, 1024).cuda(), nn.LeakyReLU(),
            nn.Linear(1024, 128).cuda(), nn.LeakyReLU(),
            nn.Linear(128, 32).cuda(), nn.LeakyReLU(),
            nn.Linear(32, 10).cuda()
        )
        
        self.loss = nn.CrossEntropyLoss()
        
    def forward(self, x1, x2, label=False):
        out = self._inference(x1, x2)
        if label is not False:
            loss = self.loss(out, label)
            return (out, loss)
        
        return out
    
    def _inference(self, x1, x2):
        bsz1 = x1.size(0)
        bsz2 = x2.size(0)
        
#         print(bsz1, bsz2)

        x1 = self.layer1_1(x1)
        x1 = self.layer1_2(x1)
        x1 = self.layer1_3(x1)
        x1 = self.layer1_4(x1)
        
        x2 = self.layer2_1(x2)
        x2 = self.layer2_2(x2)
        x2 = self.layer2_3(x2)
        x2 = self.layer2_4(x2)
        x2 = self.layer2_5(x2)
        
#         print(x1.shape)
#         print(x2.shape)
        
        x1 = x1.view(bsz1, -1)
        x2 = x2.view(bsz2, -1)
        
#         print(x1.shape)
#         print(x2.shape)
        
        x = torch.cat([x1, x2], dim=1)
#         print(x.shape)
        out = torch.nn.functional.softmax(self.out(x), dim=1)
        
        return out
    
    def conv_module_1d(self, in_num, out_num):
        return nn.Sequential(
            nn.Conv1d(in_num, out_num, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(out_num),
            nn.LeakyReLU(),
            nn.MaxPool1d(1)
        )
    
    def conv_module_2d(self, in_num, out_num):
        return nn.Sequential(
            nn.Conv2d(in_num, out_num, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out_num),
            nn.LeakyReLU(),
            nn.MaxPool2d((1, 1))
        )

In [109]:
torch.cuda.empty_cache()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = customCNN()
model.cuda()

customCNN(
  (layer1_1): Sequential(
    (0): Conv1d(1, 16, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (layer1_2): Sequential(
    (0): Conv1d(16, 32, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (layer1_3): Sequential(
    (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (layer1_4): Sequential(
    (0): Conv1d(64, 128, ker

In [110]:
test_letter = x_train[:32, :, :26].cuda()
test_image = x_train[:32, :, 26:].reshape(-1, 1, 28, 28).cuda()

In [111]:
print(test_letter.shape)
print(test_image.shape)

torch.Size([32, 1, 26])
torch.Size([32, 1, 28, 28])


In [112]:
model(test_letter, test_image)

tensor([[0.1118, 0.1026, 0.0900, 0.0892, 0.1000, 0.0957, 0.0983, 0.0957, 0.0974,
         0.1193],
        [0.1136, 0.1092, 0.0880, 0.0872, 0.1012, 0.0941, 0.0948, 0.0957, 0.0991,
         0.1171],
        [0.1156, 0.1077, 0.0907, 0.0860, 0.0986, 0.0929, 0.0948, 0.0943, 0.0974,
         0.1220],
        [0.1142, 0.1053, 0.0922, 0.0871, 0.0964, 0.0950, 0.0971, 0.0970, 0.1001,
         0.1157],
        [0.1122, 0.1087, 0.0907, 0.0876, 0.0969, 0.0934, 0.1023, 0.0945, 0.1009,
         0.1129],
        [0.1156, 0.1069, 0.0892, 0.0855, 0.0978, 0.0958, 0.0991, 0.0950, 0.0985,
         0.1166],
        [0.1127, 0.1101, 0.0911, 0.0869, 0.0990, 0.0950, 0.0972, 0.0940, 0.1003,
         0.1135],
        [0.1157, 0.1082, 0.0909, 0.0875, 0.0948, 0.0938, 0.0971, 0.0955, 0.1010,
         0.1155],
        [0.1161, 0.1085, 0.0911, 0.0865, 0.0960, 0.0938, 0.1006, 0.0950, 0.0999,
         0.1126],
        [0.1161, 0.1082, 0.0895, 0.0885, 0.0967, 0.0953, 0.0973, 0.0954, 0.0988,
         0.1143],
        [0

In [113]:
optimizer = optim.Adadelta(model.parameters(), lr=0.1)
epochs = 200
seed_val = 42
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [114]:
device = torch.device('cuda')
print(device)

cuda


In [115]:
# 정확도 계산 함수
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [116]:
# gradient 초기화
model.zero_grad()

history = defaultdict(list)
for epoch_i in range(0, epochs):
    
    total_loss = 0
    
    # train 모드로 변경
    model.train()
    
    # dataloader에서 batch size만큼 반복해서 가져옴
    for step, batch in enumerate(train_dataloader):
        
        # batch를 GPU에 적용
        batch = tuple(t.to(device) for t in batch)
        
        # batch에서 데이터 추출
        letter, image, label = batch
        
        # Forward Propagation 수행
        outputs = model(letter, image, label)
        
        loss = outputs[1]
        
        total_loss += loss.item()
        
        # Backward Propagation 수행
        loss.backward()
        history["train_loss"].append(loss.item())
        
        # 정확도 계산
        logits = outputs[0].detach().cpu().numpy()
        label = label.to("cpu").numpy()
        tmp_train_accuracy = flat_accuracy(logits, label)
        history["train_acc"].append(tmp_train_accuracy)
        
        # Gradient Cleeping
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        # gradient를 통해 weight update
        optimizer.step()
        
        # gradient 초기화
        model.zero_grad()
        
    # average loss
    avg_train_loss = total_loss / len(train_dataloader)
    
    # ========================================
    #               Validation
    # ========================================
    
    t0 = time.time()
    
    # eval 모드로 변경
    model.eval()
    
    # 변수 초기화
    eval_loss, eval_accuracy, nb_eval_steps, nb_eval_examples = 0, 0, 0, 0
    
    # dataloader에서 batch만큼 반복해서 가져옴
    for batch in valid_dataloader:
        
        # batch를 GPU에 적용
        batch = tuple(t.to(device) for t in batch)
        
        # batch에서 데이터 추출
        letter, image, label = batch
        
        # gradient 계산 안함
        with torch.no_grad():
            # Forward Propagation 수행
            outputs = model(letter, image, label)
        
        logits = outputs[0]
        history["eval_loss"].append(outputs[1].item())
        
        # CPU로 데이터 이동
        logits = logits.detach().cpu().numpy()
        label = label.to("cpu").numpy()
        
        # 출력 logit과 label을 비교하여 정확도 계산
        tmp_eval_accuracy = flat_accuracy(logits, label)
        history["eval_acc"].append(tmp_eval_accuracy)
        eval_accuracy += tmp_eval_accuracy
        nb_eval_steps += 1
        
    s = f"\r[Epoch {epoch_i+1}/{epochs}]"
    s += f" Avg Training Loss: {avg_train_loss: .2f}"
    s += " Valid Acc: {0:.2f}".format(eval_accuracy / nb_eval_steps)
    print(s, end="")
    
print("")
print("Training complete")

RuntimeError: CUDA out of memory. Tried to allocate 798.00 MiB (GPU 0; 10.76 GiB total capacity; 8.48 GiB already allocated; 408.50 MiB free; 8.53 GiB reserved in total by PyTorch)

In [None]:
torch.save(model.state_dict(), "./model/emnist_model4.pt")

In [None]:
model = customCNN()
model.load_state_dict(torch.load("./model/emnist_model4.pt"))
model.eval()
model.cuda()

In [None]:
x_test = np.concatenate(
    [
        pd.get_dummies(test["letter"]).values.reshape(-1, 1, 26),
        (test[[str(i) for i in range(784)]] / 255.).values.reshape(-1, 1, 784)
    ],
    axis=2
)
x_test = torch.Tensor(x_test)

x1 = x_test[:, :, :26].cuda()
x2 = x_test[:, :, 26:].reshape(-1, 1, 28, 28).cuda()

In [None]:
test_data = TensorDataset(x1, x2)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=32)

In [None]:
y_pred = []
for batch in test_dataloader:
    input1, input2 = batch
    with torch.no_grad():
        outputs = model(input1, input2)
    y_pred.append(torch.argmax(outputs, dim=1))

In [None]:
submission["digit"] = torch.cat(y_pred).detach().cpu().numpy()
submission.to_csv("./result/submission4.csv", index=False)