# Khai báo thư viện và lấy data từ MNIST

In [1]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [3]:
print(train_data.data.size())
print(test_data)

torch.Size([60000, 28, 28])
Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()


In [4]:
from torch.utils.data import DataLoader
'''
shuffle = shuffle dataset per 1 epoch to avoid overfitting
num_workers = num of subprocesses we use to load data (this is Python’s multiprocessing to speed up data retrieval),
0 means data will be load in main process
'''
loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=128, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=128, 
                                          shuffle=True, 
                                          num_workers=1),
}
loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x7f8eefa19750>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x7f8eefa30190>}

# Build model

In [5]:
import torch.nn as nn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=32,            
                kernel_size=3,                    
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(32, 64, 3),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Linear(64 * 5 * 5, 10)
        self.dropout = nn.Dropout(p=0.5)
    def forward(self, x):
        x = self.conv1(x)
        
        x = self.conv2(x)
        
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        #print(x.size(0))
        x = x.view(x.size(0), -1)   
        
        x = self.dropout(x) 

        output = self.out(x)
        
        return output, x    # return x for visualization

In [6]:
cnn = CNN()
print(cnn)

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1600, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


# Train model

In [8]:
from torch.autograd import Variable
from torch import optim

optimizer = optim.Adam(cnn.parameters(), lr = 0.01)
num_epochs = 10
loss_func = nn.CrossEntropyLoss()

def train(num_epochs, cnn, loaders):
    
    cnn.train()
        
    # Train the model
    total_step = len(loaders['train'])

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            
            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)   # batch x
            b_y = Variable(labels)   # batch y
            output = cnn(b_x)[0]               
            loss = loss_func(output, b_y)
            
            # clear gradients for this training step   
            optimizer.zero_grad()           
            # backpropagation, compute gradients 
            loss.backward()    
            # apply gradients             
            optimizer.step()                
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
                pass
        pass
    pass
train(num_epochs, cnn, loaders)

Epoch [1/10], Step [100/469], Loss: 0.3715
Epoch [1/10], Step [200/469], Loss: 0.3734
Epoch [1/10], Step [300/469], Loss: 0.3126
Epoch [1/10], Step [400/469], Loss: 0.3897
Epoch [2/10], Step [100/469], Loss: 0.2347
Epoch [2/10], Step [200/469], Loss: 0.2014
Epoch [2/10], Step [300/469], Loss: 0.1125
Epoch [2/10], Step [400/469], Loss: 0.2132
Epoch [3/10], Step [100/469], Loss: 0.1099
Epoch [3/10], Step [200/469], Loss: 0.1278
Epoch [3/10], Step [300/469], Loss: 0.1342
Epoch [3/10], Step [400/469], Loss: 0.1198
Epoch [4/10], Step [100/469], Loss: 0.1346
Epoch [4/10], Step [200/469], Loss: 0.1048
Epoch [4/10], Step [300/469], Loss: 0.1671
Epoch [4/10], Step [400/469], Loss: 0.0792
Epoch [5/10], Step [100/469], Loss: 0.1639
Epoch [5/10], Step [200/469], Loss: 0.0542
Epoch [5/10], Step [300/469], Loss: 0.0962
Epoch [5/10], Step [400/469], Loss: 0.1032
Epoch [6/10], Step [100/469], Loss: 0.2379
Epoch [6/10], Step [200/469], Loss: 0.2648
Epoch [6/10], Step [300/469], Loss: 0.1010
Epoch [6/10

# Evaluate model

In [9]:
def test():
    # Test the model
    cnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            test_output, last_layer = cnn(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
            correct += accuracy
            total += 1
            pass
        print(correct/total)
    
        pass
test()

0.9861550632911392
