In [1]:
import torch
from torch import nn, optim
from torch.autograd import Variable
import numpy as np
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.utils.data as Data

In [2]:
#MNIST data
mnist_trainset = datasets.MNIST(root = './data',train =True,transform=transforms.ToTensor(),download=True)
mnist_testset = datasets.MNIST(root = './data', train = False, transform=transforms.ToTensor(),download=True)


#parameters
lr = 0.001
epochs = 15
batch_size = 20

#data
traindata = Data.DataLoader(dataset=mnist_trainset, batch_size=batch_size, shuffle=True)

In [11]:
class CNNClass(nn.Module):
    
    def __init__(self):
        super(CNNClass, self).__init__()
        self._build_net()
        
    def _build_net(self):
        self.keep_prob = 0.7
        #Image shape = (-1, 28, 28, 1)
        #Conv => (-1, 28, 28, 32)
        #output shape => (-1, 14, 14, 32)
        self.layer1 = nn.Sequential(nn.Conv2d(1, 32,kernel_size = 3, stride= 1, padding = 1),
                                      nn.ReLU(), nn.MaxPool2d(kernel_size = 2, stride = 2),
                                       nn.Dropout(p = 1 - self.keep_prob))
        
        #Conv => (-1, 14, 14, 64)
        #Pool => (-1, 7, 7, 64)
        self.layer2 = nn.Sequential(nn.Conv2d(32, 64, kernel_size = 3, stride = 1, padding = 1),
                                     nn.ReLU(), nn.MaxPool2d(kernel_size = 2, stride = 2),
                                       nn.Dropout(p = 1 - self.keep_prob))
        
        #Conv => (-1, 7, 7, 128)
        #Pool => (-1, 4, 4, 128)
        self.layer3 = nn.Sequential(nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
                                     nn.ReLU(), nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 1),
                                       nn.Dropout(p = 1 - self.keep_prob))
        
        #L4:  (4*4*128) inputs => (625) outputs
        self.fc1 = nn.Linear(4*4*128, 625, bias = True)
        nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4 = nn.Sequential(self.fc1, nn.ReLU(),
                                   nn.Dropout(p = 1- self.keep_prob))
        #L5  (625) inputs => 10 outputs
        self.fc2 = nn.Linear(625, 10, bias = True)
        nn.init.xavier_uniform_(self.fc2.weight)
        
        #define cost & optimizer
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.parameters(), lr = lr)
        
    def forward(self, data):
        outputs = self.layer1(data)
        outputs = self.layer2(outputs)
        outputs = self.layer3(outputs)
        outputs = outputs.view(outputs.size(0), -1)
        outputs = self.layer4(outputs)
        outputs = self.fc2(outputs)
        return outputs
    
    def train_(self, data, label):
        self.train()  #sets the modelu in training model
        self.optimizer.zero_grad()
        outputs = self.forward(data)
        self.cost = self.criterion(outputs, label)
        self.cost.backward()
        self.optimizer.step()
        return self.cost
    
    def predict(self, x):
        self.eval() # set the model to evaluation mode (dropout = False)
        return self.forward(x)
    
    def accuracy(self, x, y):
        prediction = self.predict(x)
        correct = (prediction == y).float()
        accuracy = correct.mean()
        return accuracy
        

In [12]:
#model
model = CNNClass()
model.cuda()

CNNClass(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.30000000000000004)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.30000000000000004)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.30000000000000004)
  )
  (fc1): Linear(in_features=2048, out_features=625, bias=True)
  (layer4): Sequential(
    (0): Linear(in_features=2048, out_features=625, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.30000000000000004)
  )
  (fc2): Linear(in_features=625, out_features=10, b

In [6]:
epochs = 10

for epoch in range(1, epochs+1):
    avg_cost = 0
    total_batch = len(mnist_trainset) // batch_size
    for i, (batch_xs, batch_ys) in enumerate(traindata):
        X = Variable(batch_xs.cuda())
        Y = Variable(batch_ys.cuda())
        
        cost = model.train_(X, Y)
        
        avg_cost = cost / total_batch
        
    print('Epoch {}, cost = {}'.format(epoch, avg_cost))

Epoch 1, cost = 7.820447649464768e-07
Epoch 2, cost = 0.0001486598775954917
Epoch 3, cost = 9.090662160815555e-07
Epoch 4, cost = 3.6160152205866325e-08
Epoch 5, cost = 1.6715684978407808e-06
Epoch 6, cost = 1.9720396267075557e-06
Epoch 7, cost = 9.77579702521325e-07
Epoch 8, cost = 2.3463566378723044e-07
Epoch 9, cost = 1.0434587238705717e-06
Epoch 10, cost = 6.192246928549139e-06
