In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

import torchvision.models as models
import torchvision.datasets as dset
import torchvision.transforms as transforms

from random import *
import os
import cv2
import copy
import time

from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv('train.csv')
test  = pd.read_csv('test.csv')
submission = pd.read_csv('submission.csv')

In [3]:
train.head()

Unnamed: 0,id,digit,letter,0,1,2,3,4,5,6,...,774,775,776,777,778,779,780,781,782,783
0,1,5,L,1,1,1,4,3,0,0,...,2,1,0,1,2,4,4,4,3,4
1,2,0,B,0,4,0,0,4,1,1,...,0,3,0,1,4,1,4,2,1,2
2,3,4,L,1,1,2,2,1,1,1,...,3,3,3,0,2,0,3,0,2,2
3,4,9,D,1,2,0,2,0,4,0,...,3,3,2,0,1,4,0,0,1,1
4,5,6,A,3,0,2,4,0,3,0,...,4,4,3,2,1,3,4,3,1,2


In [4]:
x_train = np.concatenate(
    [
        pd.get_dummies(train['letter']).values.reshape(-1, 1, 26),
        (train[[str(i) for i in range(784)]] / 255.).values.reshape(-1, 1, 784)
    ],
    axis=2
)
y_train = train['digit']

In [5]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=123)

In [6]:
print(x_train.shape)
print(x_valid.shape)
print(y_train.shape)
print(y_valid.shape)

(1638, 1, 810)
(410, 1, 810)
(1638,)
(410,)


In [7]:
# tensor로 형변환
x_train = torch.Tensor(x_train)
x_valid = torch.Tensor(x_valid)
y_train = torch.Tensor(y_train.values)
y_valid = torch.Tensor(y_valid.values)

In [8]:
train_data = TensorDataset(
    x_train[:, :, 26], # Letter
    x_train[:, :, 26:].reshape(-1, 1, 28, 28), # Image
    y_train # Digit
)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=32)

valid_data = TensorDataset(
    x_valid[:, :, :26],
    x_valid[:, :, 26:].reshape(-1, 1, 28, 28),
    y_valid
)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=32)

In [26]:
class customCNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Letter의 Convolution Block
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 16, 3, padding=1), nn.ReLU(),
            nn.Conv1d(16, 64, 3, padding=1), nn.ReLU(),
            nn.Conv1d(64, 128, 3, padding=1), nn.ReLU(),
            nn.Conv1d(128, 64, 3, padding=1), nn.ReLU(),
            nn.Conv1d(64, 16, 3, padding=1), nn.ReLU(),
        )
        
        # Image의 Convolution Block
        self.conv2 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 128, 3, padding=1), nn.ReLU(),
            nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(),
            nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(),
            nn.Conv2d(512, 256, 3, padding=1), nn.ReLU(),
            nn.Conv2d(256, 128, 3, padding=1), nn.ReLU(),
            nn.Conv2d(128, 64, 3, padding=1), nn.ReLU(),
            nn.Conv2d(64, 32, 3, padding=1), nn.ReLU(),
        )
        
        '''
            Output Size = (W - F + 2P) / S + 1

            W: input_volume_size
            F: kernel_size
            P: padding_size
            S: strides
        '''
        16 - 3 + 2*1 / 1 + 1 = 16
        channel = 16, height = 16 => 3*3*16 + 3*3*16 = 288
        
        32 - 5 + 2*3 / 1 + 1 = 34
        channel = 32, height = 34 => 5*5*32 + 5*5*34 = 1650
        
        # conv1 -> (16 - 3 + 2*1) / 1 + 1 = 
        # conv2 -> (32 - 3 + 2*1) / 1 + 1 = 
        self.out = nn.Sequential(
            nn.Linear(32 * 3 * 3 + 16 * 3 * 3, 128), nn.ReLU(),
            nn.Linear(128, 32), nn.ReLU(),
            nn.Linear(32, 10)
        )
        
        self.loss = nn.CrossEntropyLoss()
    
    def _inference(self, x1, x2):
        bsz = x1.size(0)
        x1 = self.conv1(x1)
        x2 = self.conv2(x2)
        x1 = x1.view(bsz, -1)
        x2 = x2.view(bsz, -1)
        
        x = torch.cat([x1, x2], dim=1)
        out = torch.nn.functional.softmax(self.out(x), dim=1)
    
    def forward(self, x1, x2, label=False):
        out = self._inference(x1, x2)
        if label is not False:
            loss = self.loss(out, label)
            return (out, loss)
        
        return out

In [27]:
model = customCNN()
model.cuda()

customCNN(
  (conv1): Sequential(
    (0): Conv1d(1, 16, kernel_size=(3,), stride=(1,))
    (1): ReLU()
    (2): Conv1d(16, 64, kernel_size=(3,), stride=(1,))
    (3): ReLU()
    (4): Conv1d(64, 128, kernel_size=(3,), stride=(1,))
    (5): ReLU()
    (6): Conv1d(128, 64, kernel_size=(3,), stride=(1,))
    (7): ReLU()
    (8): Conv1d(64, 16, kernel_size=(3,), stride=(1,))
    (9): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 128, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU()
    (4): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
    (5): ReLU()
    (6): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1))
    (9): ReLU()
    (10): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1))
    (11): ReLU()
    (12): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1))
    (13): ReLU()
    (14): Conv2d(64, 32, kernel_size=(3, 3), 

In [28]:
x1 = x_train[:32, :, :26].cuda()
x2 = x_train[:32, :, 26:].reshape(-1, 1, 28, 28).cuda()

In [29]:
print(model(x1, x2))

RuntimeError: mat1 dim 1 must match mat2 dim 0