# CNN: Digit Recognition

## 1. Module and Dataset

In [2]:
# Just for data
import tensorflow as tf
mnist = tf.keras.datasets.mnist

# Load
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize
x_train, x_test = x_train / 255.0, x_test / 255.0

del mnist
del tf

In [3]:
%whos

Variable   Type       Data/Info
-------------------------------
x_test     ndarray    10000x28x28: 7840000 elems, type `float64`, 62720000 bytes (59.814453125 Mb)
x_train    ndarray    60000x28x28: 47040000 elems, type `float64`, 376320000 bytes (358.88671875 Mb)
y_test     ndarray    10000: 10000 elems, type `uint8`, 10000 bytes
y_train    ndarray    60000: 60000 elems, type `uint8`, 60000 bytes


In [4]:
import torch

# dev setting
cuda0 = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [5]:
# Training dataset
x_train = torch.Tensor(x_train)
y_train = torch.LongTensor(y_train)

# Test dataset
x_test = torch.Tensor(x_test)
y_test = torch.LongTensor(y_test)

## 2. Modeling

In [11]:
import torch.nn as nn

# (Batch size) X (Channel)        X (Height) X (Width)
# (100개씩)    X (흑백이니 채널1)   X (Height) X (Width)
inputs = torch.Tensor(100, 1, 28, 28) 
print('Shape of tensor : {}'.format(inputs.shape))

Shape of tensor : torch.Size([100, 1, 28, 28])


In [12]:
conv1 = nn.Conv2d(1, 32, 3, padding=1) 
# Conv2d(in_channels: int,
#        out_channels: int,
#        kernel_size: Union[int, Tuple[int, int]],
#        stride: Union[int, Tuple[int, int]] = 1,
#        padding: Union[int, Tuple[int, int]] = 0,
#        dilation: Union[int, Tuple[int, int]] = 1,
#        groups: int = 1,
#        bias: bool = True,
#        padding_mode: str = 'zeros')
#
# Applies a 2D convolution over an input signal composed of several input planes
#
#      \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
#                                          \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
print(conv1)

Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [13]:
conv2 = nn.Conv2d(32, 64, 3, padding=1)
print(conv2)

Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [14]:
pool = nn.MaxPool2d(2)
# MaxPool2d(kernel_size: Union[int, Tuple[int, ...]],
#           stride: Union[int, Tuple[int, ...],
#           NoneType] = None,
#           padding: Union[int, Tuple[int, ...]] = 0,
#           dilation: Union[int, Tuple[int, ...]] = 1,
#           return_indices: bool = False,
#           ceil_mode: bool = False)
#
# Applies a 2D max pooling over an input signal composed of several input planes.
#
#       \begin{aligned}
#           out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
#                                   & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
#                                                  \text{stride[1]} \times w + n)
#       \end{aligned}
print(pool)

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


In [22]:
print('-'*50)
out = conv1(inputs)
print('After first conv : ')
print(out.shape)

out = pool(out)
print('After first pooling : ')
print(out.shape)
print('-'*50)

out = conv2(out)
print('After second conv : ')
print(out.shape)

out = pool(out)
print('After second pooling : ')
print(out.shape)
print('-'*50)


# Stretch without batch dimension(first dimension)
out = out.view(out.size(0), -1)
print('After flatten : ')
print(out.shape)

fc = nn.Linear(3136, 10)
out = fc(out)
print('After Fully-connected : ')
print(out.shape)

--------------------------------------------------
After first conv : 
torch.Size([100, 32, 28, 28])
After first pooling : 
torch.Size([100, 32, 14, 14])
--------------------------------------------------
After second conv : 
torch.Size([100, 64, 14, 14])
After second pooling : 
torch.Size([100, 64, 7, 7])
--------------------------------------------------
After flatten : 
torch.Size([100, 3136])
After Fully-connected : 
torch.Size([100, 10])


## 4. Network structure

In [76]:
# parameters setting
learning_rate = 0.001
training_epochs = 5
batch_size = 100

# Network
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        
        # First conv layer
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Second conv layer        
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Fully connected layer
        self.fc = nn.Linear(7*7*64, 10) # contains bias
        nn.init.xavier_uniform_(self.fc.weight) # init weights with xavier uniform metric
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        
        # Flatten
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
# get model
model = CNN().to(cuda0)

## 5. Optimizer and data loader

In [57]:
from torch import optim

# Loss function and GPU Setting
criterion = nn.CrossEntropyLoss().to(cuda0) # contains softmax

# Adam
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

from torch.utils.data import TensorDataset, DataLoader

# ds
ds_train = TensorDataset(x_train, y_train)
ds_test = TensorDataset(x_test, y_test)

# online learning
loader_train = DataLoader(ds_train, batch_size=100, shuffle=True)
loader_test = DataLoader(ds_test, batch_size=100, shuffle=False)

print('total train dataset batch :', len(loader_train))

total train dataset batch : 600


## 6. Train and Test functions

In [60]:
def train(training_epochs):
    
    for epoch in range(training_epochs):
        avg_cost = 0
        
        for mini_batch, labels in loader_train:
            mini_batch = mini_batch.to(cuda0)
            mini_batch = mini_batch.view(mini_batch.size(0), 1, mini_batch.size(1), mini_batch.size(2))
            labels = labels.to(cuda0)
            
            optimizer.zero_grad()
            hypothesis = model(mini_batch)
            
            cost = criterion(hypothesis, labels)
            cost.backward()
            optimizer.step()
            
            avg_cost += cost / 600
            
        print('[Epoch: {}] cost = {}'.format(epoch + 1, avg_cost))

def test():
    with torch.no_grad():
        data_test = x_test.view(len(x_test), 1, 28, 28).to(cuda0)
        target_test = y_test.to(cuda0)
        
        prediction = torch.argmax(model(data_test), 1) == target_test
        accuracy = prediction.float().mean()
        print('Accuracy :', accuracy.item())

In [63]:
%%time
train(10)
test()

[Epoch: 1] cost = 0.006421179510653019
[Epoch: 2] cost = 0.0071117752231657505
[Epoch: 3] cost = 0.006304878741502762
[Epoch: 4] cost = 0.005361402872949839
[Epoch: 5] cost = 0.004517941735684872
[Epoch: 6] cost = 0.004429440945386887
[Epoch: 7] cost = 0.0028939840849488974
[Epoch: 8] cost = 0.003450205083936453
[Epoch: 9] cost = 0.005184118170291185
[Epoch: 10] cost = 0.0024727066047489643
Accuracy : 0.9914000034332275
Wall time: 13.2 s


## 7. 추가실험01 : Conv Layer 늘(줄)리기

In [67]:
# < Base Model >
# --------------------------------------------------
# After first conv : 
# torch.Size([100, 32, 28, 28])
# After first pooling : 
# torch.Size([100, 32, 14, 14])
# --------------------------------------------------
# After second conv : 
# torch.Size([100, 64, 14, 14])
# After second pooling : 
# torch.Size([100, 64, 7, 7])
# --------------------------------------------------
# After flatten : 
# torch.Size([100, 3136])
# After Fully-connected : 
# torch.Size([100, 10])

In [68]:
# < Target Model01 >
# --------------------------------------------------
# After first conv : 
# torch.Size([100, 32, 28, 28])
# After first pooling : 
# torch.Size([100, 32, 14, 14])
# --------------------------------------------------
# After second conv : 
# torch.Size([100, 64, 14, 14])
# After second pooling : 
# torch.Size([100, 64, 7, 7])
# --------------------------------------------------
# After third conv : 
# torch.Size([100, 128, 7, 7])
# After second pooling : 
# torch.Size([100, 128, 3, 3])
# --------------------------------------------------
# After flatten : 
# torch.Size([100, 128*3*3])
# After Fully-connected : 
# torch.Size([100, 10])

In [70]:
# < Target Model02 >
# --------------------------------------------------
# After first conv : 
# torch.Size([100, 32, 28, 28])
# After first pooling : 
# torch.Size([100, 32, 14, 14])
# --------------------------------------------------
# After flatten : 
# torch.Size([100, 32*14*14])
# After Fully-connected : 
# torch.Size([100, 10])

In [79]:
class CNN01(nn.Module):
    
    def __init__(self):
        super(CNN01, self).__init__()
        
        # First conv layer
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Second conv layer        
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Third conv layer        
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Fully connected layer
        self.fc = nn.Linear(128*3*3, 10) # contains bias
        nn.init.xavier_uniform_(self.fc.weight) # init weights with xavier uniform metric
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        
        # Flatten
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
# get model
model = CNN01().to(cuda0)

In [84]:
# Adam
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

def train(training_epochs):
    
    for epoch in range(training_epochs):
        avg_cost = 0
        
        for mini_batch, labels in loader_train:
            mini_batch = mini_batch.to(cuda0)
            mini_batch = mini_batch.view(mini_batch.size(0), 1, mini_batch.size(1), mini_batch.size(2))
            labels = labels.to(cuda0)
            
            optimizer.zero_grad()
            hypothesis = model(mini_batch)
            
            cost = criterion(hypothesis, labels)
            cost.backward()
            optimizer.step()
            
            avg_cost += cost / 600
            
        print('[Epoch: {}] cost = {}'.format(epoch + 1, avg_cost))

def test():
    with torch.no_grad():
        data_test = x_test.view(len(x_test), 1, 28, 28).to(cuda0)
        target_test = y_test.to(cuda0)
        
        prediction = torch.argmax(model(data_test), 1) == target_test
        accuracy = prediction.float().mean()
        print('Accuracy :', accuracy.item())

In [85]:
%%time
train(10)
test()

[Epoch: 1] cost = 0.2811686098575592
[Epoch: 2] cost = 0.09663382172584534
[Epoch: 3] cost = 0.06970563530921936
[Epoch: 4] cost = 0.05798151716589928
[Epoch: 5] cost = 0.04972846060991287
[Epoch: 6] cost = 0.043782610446214676
[Epoch: 7] cost = 0.0385272391140461
[Epoch: 8] cost = 0.03475702181458473
[Epoch: 9] cost = 0.03151321783661842
[Epoch: 10] cost = 0.02829497493803501
Accuracy : 0.9835000038146973
Wall time: 9.79 s


In [87]:
class CNN02(nn.Module):
    
    def __init__(self):
        super(CNN02, self).__init__()
        
        # First conv layer
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Fully connected layer
        self.fc = nn.Linear(32*14*14, 10) # contains bias
        nn.init.xavier_uniform_(self.fc.weight) # init weights with xavier uniform metric
        
    def forward(self, x):
        out = self.conv1(x)
        
        # Flatten
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
# get model
model = CNN02().to(cuda0)

In [88]:
# Adam
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

def train(training_epochs):
    
    for epoch in range(training_epochs):
        avg_cost = 0
        
        for mini_batch, labels in loader_train:
            mini_batch = mini_batch.to(cuda0)
            mini_batch = mini_batch.view(mini_batch.size(0), 1, mini_batch.size(1), mini_batch.size(2))
            labels = labels.to(cuda0)
            
            optimizer.zero_grad()
            hypothesis = model(mini_batch)
            
            cost = criterion(hypothesis, labels)
            cost.backward()
            optimizer.step()
            
            avg_cost += cost / 600
            
        print('[Epoch: {}] cost = {}'.format(epoch + 1, avg_cost))

def test():
    with torch.no_grad():
        data_test = x_test.view(len(x_test), 1, 28, 28).to(cuda0)
        target_test = y_test.to(cuda0)
        
        prediction = torch.argmax(model(data_test), 1) == target_test
        accuracy = prediction.float().mean()
        print('Accuracy :', accuracy.item())

In [89]:
%%time
train(10)
test()

[Epoch: 1] cost = 0.29346925020217896
[Epoch: 2] cost = 0.10390042513608932
[Epoch: 3] cost = 0.07248079776763916
[Epoch: 4] cost = 0.059315357357263565
[Epoch: 5] cost = 0.05086713656783104
[Epoch: 6] cost = 0.04484635218977928
[Epoch: 7] cost = 0.03970688581466675
[Epoch: 8] cost = 0.035795196890830994
[Epoch: 9] cost = 0.03148060292005539
[Epoch: 10] cost = 0.02825966849923134
Accuracy : 0.9842999577522278
Wall time: 9.77 s


## 추가실험 보류