# MLP : Digit Recognition

## 1. Module and Dataset

In [2]:
# Just for data
import tensorflow as tf
mnist = tf.keras.datasets.mnist

# Load
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize
x_train, x_test = x_train / 255.0, x_test / 255.0

del mnist
del tf

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [3]:
%whos

Variable   Type       Data/Info
-------------------------------
x_test     ndarray    10000x28x28: 7840000 elems, type `float64`, 62720000 bytes (59.814453125 Mb)
x_train    ndarray    60000x28x28: 47040000 elems, type `float64`, 376320000 bytes (358.88671875 Mb)
y_test     ndarray    10000: 10000 elems, type `uint8`, 10000 bytes
y_train    ndarray    60000: 60000 elems, type `uint8`, 60000 bytes


In [4]:
import torch

# dev setting
cuda0 = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [5]:
# Training dataset
x_train = torch.Tensor(x_train)
y_train = torch.LongTensor(y_train)

# Test dataset
x_test = torch.Tensor(x_test)
y_test = torch.LongTensor(y_test)

In [6]:
print('<   Train dataset   >')
print('Shape of x_train : ', x_train.shape)
print('Shape of y_train : ', y_train.shape)
print('<   Test dataset   >')
print('Shape of x_test : ', x_test.shape)
print('Shape of y_test : ', y_test.shape)

<   Train dataset   >
Shape of x_train :  torch.Size([60000, 28, 28])
Shape of y_train :  torch.Size([60000])
<   Test dataset   >
Shape of x_test :  torch.Size([10000, 28, 28])
Shape of y_test :  torch.Size([10000])


### 2. MLP Model
1. Model01

In [7]:
from torch import nn

## Model 01
# Sequential Model
model01 = nn.Sequential()

# Input layer to h1 layer
model01.add_module('fc1', nn.Linear(28*28*1, 28*28*3))
model01.add_module('sig1', nn.Sigmoid())

# h1 Layer to output layer
model01.add_module('fc2', nn.Linear(28*28*3, 10))

# GPU Setting
model01.to(cuda0)

## Model 02
# Sequential Model
model02 = nn.Sequential()

# Input layer to h1 layer
model02.add_module('fc1', nn.Linear(28*28*1, 28*28))
model02.add_module('sig1', nn.Sigmoid())

# h1 Layer to h3 layer
model02.add_module('fc2', nn.Linear(28*28*1, 28*28))
model02.add_module('sig2', nn.Sigmoid())
model02.add_module('fc3', nn.Linear(28*28*1, 28*28))
model02.add_module('sig3', nn.Sigmoid())

# h1 Layer to output layer
model02.add_module('fc4', nn.Linear(28*28, 10))

# GPU Setting
model02.to(cuda0)


## Model03
# Sequential Model
model03 = nn.Sequential()

# Input layer to h1 layer
model03.add_module('fc1', nn.Linear(28*28*1, 28*28*3))
model03.add_module('relu1', nn.ReLU())

# h1 Layer to output layer
model03.add_module('fc2', nn.Linear(28*28*3, 10))

# GPU Setting
model03.to(cuda0)


## Model04
## Model 02
# Sequential Model
model04 = nn.Sequential()

# Input layer to h1 layer
model04.add_module('fc1', nn.Linear(28*28*1, 28*28))
model04.add_module('relu1', nn.ReLU())

# h1 Layer to h3 layer
model04.add_module('fc2', nn.Linear(28*28*1, 28*28))
model04.add_module('relu2', nn.ReLU())
model04.add_module('fc3', nn.Linear(28*28*1, 28*28))
model04.add_module('relu3', nn.ReLU())

# h1 Layer to output layer
model04.add_module('fc4', nn.Linear(28*28, 10))

# GPU Setting
model04.to(cuda0)

print('------------------------------------------')
print('< Model01 >')
print(model01)
print('------------------------------------------')
print('< Model02 >')
print(model02)
print('------------------------------------------')
print('< Model03 >')
print(model03)
print('------------------------------------------')
print('< Model04 >')
print(model04)
print('------------------------------------------')

------------------------------------------
< Model01 >
Sequential(
  (fc1): Linear(in_features=784, out_features=2352, bias=True)
  (sig1): Sigmoid()
  (fc2): Linear(in_features=2352, out_features=10, bias=True)
)
------------------------------------------
< Model02 >
Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (sig1): Sigmoid()
  (fc2): Linear(in_features=784, out_features=784, bias=True)
  (sig2): Sigmoid()
  (fc3): Linear(in_features=784, out_features=784, bias=True)
  (sig3): Sigmoid()
  (fc4): Linear(in_features=784, out_features=10, bias=True)
)
------------------------------------------
< Model03 >
Sequential(
  (fc1): Linear(in_features=784, out_features=2352, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=2352, out_features=10, bias=True)
)
------------------------------------------
< Model04 >
Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=784, out_features=784, 

## 3. MLP Optimizer

In [8]:
from torch import optim

# Loss function and GPU Setting
loss_fn = nn.CrossEntropyLoss().to(cuda0)

# Adam
optimizer01 = optim.Adam(model01.parameters(), lr=0.005)
optimizer02 = optim.Adam(model02.parameters(), lr=0.005)
optimizer03 = optim.Adam(model03.parameters(), lr=0.005)
optimizer04 = optim.Adam(model04.parameters(), lr=0.005)

## 4. Data loader

In [9]:
from torch.utils.data import TensorDataset, DataLoader

# ds
ds_train = TensorDataset(x_train, y_train)
ds_test = TensorDataset(x_test, y_test)

# online learning
loader_train = DataLoader(ds_train, batch_size=1, shuffle=True)
loader_test = DataLoader(ds_test, batch_size=1, shuffle=False)

## 4. Train and Test functions

In [10]:
def train(model, optimizer, epoch):
    # Convert to train mode
    model.train()

    # Get a mini-batch from data loader and execute
    for data, targets in loader_train:
        # GPU Allocation
        data, targets = data.reshape(1, 28*28).to(cuda0), targets.to(cuda0)
        
        optimizer.zero_grad() # init gardient to zero.
        outputs = model(data)  # forward, Flatten
        loss = loss_fn(outputs, targets)  # Get error
        loss.backward()  # back-propagation
        optimizer.step()  # weights update

In [11]:
def test(model):
    # Convert to train mode
    model.eval()
    # Correcting counter variable
    correct = 0
    
    # In prediction, we don't need to have gradients.
    with torch.no_grad(): 
        # Get a mini-batch from data loader and execute
        for data, targets in loader_test:
            # GPU Allocation
            data, targets = data.reshape(1, 28*28).to(cuda0), targets.to(cuda0)
            
            outputs = model(data)  # forward, Flatten
            _, predicted = torch.max(outputs.data, 1)  # Get maximum probability label.
            correct += predicted.eq(targets.data.view_as(predicted)).sum()  # Counter successor

    # print accuracy
    data_num = len(loader_test.dataset)  # Number of data
    print('Test Accuracy: {}/{} ({:.0f}%)\n'.format(correct,
                                                   data_num, 100. * correct / data_num))

## 5. Models Training and Test

In [12]:
def getResult(model, optimizer, maxEpoch):
    print('------------------------------------------')
    print(model)
    print('------------------------------------------')
    
    for i in range(maxEpoch + 1):
        print(i, 'epoch : ')
        train(model, optimizer, i)
        test(model)

In [12]:
%%time
getResult(model01, optimizer01, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=2352, bias=True)
  (sig1): Sigmoid()
  (fc2): Linear(in_features=2352, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 9379/10000 (94%)

1 epoch : 
Test Accuracy: 9146/10000 (91%)

2 epoch : 
Test Accuracy: 9505/10000 (95%)

3 epoch : 
Test Accuracy: 9535/10000 (95%)

4 epoch : 
Test Accuracy: 9627/10000 (96%)

5 epoch : 
Test Accuracy: 9529/10000 (95%)

6 epoch : 
Test Accuracy: 9528/10000 (95%)

7 epoch : 
Test Accuracy: 9596/10000 (96%)

8 epoch : 
Test Accuracy: 9579/10000 (96%)

9 epoch : 
Test Accuracy: 9657/10000 (97%)

10 epoch : 
Test Accuracy: 9635/10000 (96%)

Wall time: 13min 57s


In [24]:
%%time
getResult(model02, optimizer02, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (sig1): Sigmoid()
  (fc2): Linear(in_features=784, out_features=784, bias=True)
  (sig2): Sigmoid()
  (fc3): Linear(in_features=784, out_features=784, bias=True)
  (sig3): Sigmoid()
  (fc4): Linear(in_features=784, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 982/10000 (10%)

1 epoch : 
Test Accuracy: 982/10000 (10%)

2 epoch : 
Test Accuracy: 982/10000 (10%)

3 epoch : 
Test Accuracy: 982/10000 (10%)

4 epoch : 
Test Accuracy: 982/10000 (10%)

5 epoch : 
Test Accuracy: 982/10000 (10%)

6 epoch : 
Test Accuracy: 982/10000 (10%)

7 epoch : 
Test Accuracy: 982/10000 (10%)

8 epoch : 
Test Accuracy: 982/10000 (10%)

9 epoch : 
Test Accuracy: 982/10000 (10%)

10 epoch : 
Test Accuracy: 982/10000 (10%)

Wall time: 10min 27s


In [21]:
%%time
getResult(model03, optimizer03, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=2352, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=2352, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 9279/10000 (93%)

1 epoch : 
Test Accuracy: 9419/10000 (94%)

2 epoch : 
Test Accuracy: 9373/10000 (94%)

3 epoch : 
Test Accuracy: 9430/10000 (94%)

4 epoch : 
Test Accuracy: 9455/10000 (95%)

5 epoch : 
Test Accuracy: 9383/10000 (94%)

6 epoch : 
Test Accuracy: 9491/10000 (95%)

7 epoch : 
Test Accuracy: 9540/10000 (95%)

8 epoch : 
Test Accuracy: 9559/10000 (96%)

9 epoch : 
Test Accuracy: 9532/10000 (95%)

10 epoch : 
Test Accuracy: 9541/10000 (95%)

Wall time: 13min 55s


In [25]:
%%time
getResult(model04, optimizer04, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=784, out_features=784, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=784, out_features=784, bias=True)
  (relu3): ReLU()
  (fc4): Linear(in_features=784, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 1136/10000 (11%)

1 epoch : 
Test Accuracy: 1136/10000 (11%)

2 epoch : 
Test Accuracy: 1136/10000 (11%)

3 epoch : 
Test Accuracy: 1136/10000 (11%)

4 epoch : 
Test Accuracy: 1136/10000 (11%)

5 epoch : 
Test Accuracy: 1136/10000 (11%)

6 epoch : 
Test Accuracy: 1136/10000 (11%)

7 epoch : 
Test Accuracy: 1136/10000 (11%)

8 epoch : 
Test Accuracy: 1136/10000 (11%)

9 epoch : 
Test Accuracy: 1136/10000 (11%)

10 epoch : 
Test Accuracy: 1136/10000 (11%)

Wall time: 10min 32s


## 6. 추가 실험01 : Hidden layer 수 조정

In [13]:
## Model 01
# Sequential Model
model01 = nn.Sequential()

# Input layer to h1 layer
model01.add_module('fc1', nn.Linear(28*28*1, 28*28))
model01.add_module('sig1', nn.ReLU())

# h1 Layer to h3 layer
model01.add_module('fc2', nn.Linear(28*28*1, 28*28))
model01.add_module('sig2', nn.ReLU())

# h1 Layer to output layer
model01.add_module('fc3', nn.Linear(28*28, 10))

# GPU Setting
model01.to(cuda0)


## Model 02
# Sequential Model
model02 = nn.Sequential()

# Input layer to h1 layer
model02.add_module('fc1', nn.Linear(28*28*1, 28*28))
model02.add_module('sig1', nn.ReLU())

# h1 Layer to h3 layer
model02.add_module('fc2', nn.Linear(28*28*1, 28*28))
model02.add_module('sig2', nn.ReLU())
model02.add_module('fc3', nn.Linear(28*28*1, 28*28))
model02.add_module('sig3', nn.ReLU())
model02.add_module('fc4', nn.Linear(28*28*1, 28*28))
model02.add_module('sig4', nn.ReLU())

# h1 Layer to output layer
model02.add_module('fc5', nn.Linear(28*28, 10))

# GPU Setting
model02.to(cuda0)


#
print(model01)
print(model02)


# Adam
optimizer01 = optim.Adam(model01.parameters(), lr=0.005)
optimizer02 = optim.Adam(model02.parameters(), lr=0.005)

Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (sig1): ReLU()
  (fc2): Linear(in_features=784, out_features=784, bias=True)
  (sig2): ReLU()
  (fc3): Linear(in_features=784, out_features=10, bias=True)
)
Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (sig1): ReLU()
  (fc2): Linear(in_features=784, out_features=784, bias=True)
  (sig2): ReLU()
  (fc3): Linear(in_features=784, out_features=784, bias=True)
  (sig3): ReLU()
  (fc4): Linear(in_features=784, out_features=784, bias=True)
  (sig4): ReLU()
  (fc5): Linear(in_features=784, out_features=10, bias=True)
)


In [14]:
%%time
getResult(model01, optimizer01, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (sig1): ReLU()
  (fc2): Linear(in_features=784, out_features=784, bias=True)
  (sig2): ReLU()
  (fc3): Linear(in_features=784, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 8477/10000 (85%)

1 epoch : 
Test Accuracy: 8564/10000 (86%)

2 epoch : 
Test Accuracy: 8310/10000 (83%)

3 epoch : 
Test Accuracy: 6528/10000 (65%)

4 epoch : 
Test Accuracy: 7208/10000 (72%)

5 epoch : 
Test Accuracy: 6343/10000 (63%)

6 epoch : 
Test Accuracy: 5699/10000 (57%)

7 epoch : 
Test Accuracy: 4617/10000 (46%)

8 epoch : 
Test Accuracy: 4829/10000 (48%)

9 epoch : 
Test Accuracy: 5459/10000 (55%)

10 epoch : 
Test Accuracy: 4533/10000 (45%)

Wall time: 13min 38s


In [19]:
%%time
getResult(model02, optimizer02, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (sig1): ReLU()
  (fc2): Linear(in_features=784, out_features=784, bias=True)
  (sig2): ReLU()
  (fc3): Linear(in_features=784, out_features=784, bias=True)
  (sig3): ReLU()
  (fc4): Linear(in_features=784, out_features=784, bias=True)
  (sig4): ReLU()
  (fc5): Linear(in_features=784, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 7451/10000 (75%)

1 epoch : 
Test Accuracy: 7010/10000 (70%)

2 epoch : 
Test Accuracy: 6325/10000 (63%)

3 epoch : 
Test Accuracy: 7162/10000 (72%)

4 epoch : 
Test Accuracy: 6928/10000 (69%)

5 epoch : 
Test Accuracy: 7650/10000 (76%)

6 epoch : 
Test Accuracy: 6697/10000 (67%)

7 epoch : 
Test Accuracy: 7867/10000 (79%)

8 epoch : 
Test Accuracy: 8183/10000 (82%)

9 epoch : 
Test Accuracy: 5888/10000 (59%)

10 epoch : 
Test Accuracy: 8243/10000 (82%)

Wall time: 20min 9s


## 7. 추가 실험02 : Hidden node 수 조정

In [13]:
## Model03
# Sequential Model
model03 = nn.Sequential()

# Input layer to h1 layer
model03.add_module('fc1', nn.Linear(28*28*1, 28*28*2))
model03.add_module('relu1', nn.ReLU())

# h1 Layer to output layer
model03.add_module('fc2', nn.Linear(28*28*2, 10))

# GPU Setting
model03.to(cuda0)


## Model04
# Sequential Model
model04 = nn.Sequential()

# Input layer to h1 layer
model04.add_module('fc1', nn.Linear(28*28*1, 28*28*4))
model04.add_module('relu1', nn.ReLU())

# h1 Layer to output layer
model04.add_module('fc2', nn.Linear(28*28*4, 10))

# GPU Setting
model04.to(cuda0)



#
print(model03)
print(model04)


# Adam
optimizer03 = optim.Adam(model03.parameters(), lr=0.005)
optimizer04 = optim.Adam(model04.parameters(), lr=0.005)

Sequential(
  (fc1): Linear(in_features=784, out_features=1568, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=1568, out_features=10, bias=True)
)
Sequential(
  (fc1): Linear(in_features=784, out_features=3136, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=3136, out_features=10, bias=True)
)


In [14]:
%%time
getResult(model03, optimizer03, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=1568, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=1568, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 9282/10000 (93%)

1 epoch : 
Test Accuracy: 9182/10000 (92%)

2 epoch : 
Test Accuracy: 9234/10000 (92%)

3 epoch : 
Test Accuracy: 9330/10000 (93%)

4 epoch : 
Test Accuracy: 9370/10000 (94%)

5 epoch : 
Test Accuracy: 9377/10000 (94%)

6 epoch : 
Test Accuracy: 9412/10000 (94%)

7 epoch : 
Test Accuracy: 9384/10000 (94%)

8 epoch : 
Test Accuracy: 9376/10000 (94%)

9 epoch : 
Test Accuracy: 9448/10000 (94%)

10 epoch : 
Test Accuracy: 9458/10000 (95%)

Wall time: 11min 43s


In [15]:
%%time
getResult(model04, optimizer04, 10)

------------------------------------------
Sequential(
  (fc1): Linear(in_features=784, out_features=3136, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=3136, out_features=10, bias=True)
)
------------------------------------------
0 epoch : 
Test Accuracy: 9266/10000 (93%)

1 epoch : 
Test Accuracy: 9324/10000 (93%)

2 epoch : 
Test Accuracy: 9436/10000 (94%)

3 epoch : 
Test Accuracy: 9462/10000 (95%)

4 epoch : 
Test Accuracy: 9506/10000 (95%)

5 epoch : 
Test Accuracy: 9470/10000 (95%)

6 epoch : 
Test Accuracy: 9537/10000 (95%)

7 epoch : 
Test Accuracy: 9532/10000 (95%)

8 epoch : 
Test Accuracy: 9506/10000 (95%)

9 epoch : 
Test Accuracy: 9541/10000 (95%)

10 epoch : 
Test Accuracy: 9552/10000 (96%)

Wall time: 15min 20s


## 추가실험 보류