In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import numpy as np

class CNN(nn.Module):
    def __init__(self):

        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 16, kernel_size=3,padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3,padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3,padding=1)
        self.conv4 = nn.Conv2d(32, 32, kernel_size=3,padding=1)
        self.fc1 = nn.Linear(7*7*32, 100)
        self.fc2 = nn.Linear(100, 10)
        self.drop_layer = nn.Dropout(p=0.2)

    def last_hidden_layer_output(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = self.drop_layer(F.relu(self.conv3(x)))
        x = self.drop_layer(F.relu(self.conv4(x)))
        x = x.view(-1, 7*7*32)
        x = F.relu(self.fc1(x))
        return x

    def forward(self, x):
        x = self.last_hidden_layer_output(x)
        x = self.fc2(x)
        return x

batch_size = 64

mnist_train = datasets.MNIST("data", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST("data", train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)

torch.manual_seed(2)
torch.cuda.manual_seed(2)

softmax = nn.Softmax(dim=1)

def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 2 every 10 epochs"""
    lr = learning_rate * (0.5 ** (epoch // 10))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def enable_dropout(model):
    """ Function to enable the dropout layers during test-time """
    for m in model.modules():
        if m.__class__.__name__.startswith('Dropout'):
            m.train()

def epoch(loader, model, opt=None):

    if opt:
        model.train()
    else:
        model.eval()
    

    total_loss, total_err = 0., 0.

    for X, y in loader:
        X, y = X.to(device), y.to(device)
        yp = model(X)
        loss = F.nll_loss(F.log_softmax(yp, dim=1), y)
        if opt:
            opt.zero_grad()
            loss.backward()
            opt.step()

        total_err += (yp.max(dim=1)[1] != y).sum().item()
        total_loss += loss.item() * X.shape[0]
    return total_err / len(loader.dataset), total_loss / len(loader.dataset)

model_cnn = CNN()
model_cnn = model_cnn.to(device)

learning_rate = 0.01

opt = optim.SGD(model_cnn.parameters(), lr=learning_rate, momentum=0.9)

for t in range(30):
    adjust_learning_rate(opt, t)
    train_err, train_loss = epoch(train_loader, model_cnn, opt)
    test_err, test_loss = epoch(test_loader, model_cnn)
    print(*("{:.6f}".format(i) for i in (train_err, test_err)), sep="\t")

torch.save(model_cnn.state_dict(), "model_cnn_mnist_digit.pt")

model_cnn.load_state_dict(torch.load("model_cnn_mnist_digit.pt"))
model_cnn.eval()


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


0.271583	0.034700
0.036650	0.026800
0.026950	0.020400
0.021033	0.016800
0.017583	0.014900
0.015567	0.013200
0.013700	0.013800
0.012267	0.012100
0.010883	0.012400
0.009800	0.010500
0.007517	0.009800
0.006250	0.009500
0.006833	0.009000
0.006583	0.008600
0.006100	0.009700
0.005833	0.008700
0.005017	0.008800
0.005450	0.009300
0.004950	0.009200
0.004600	0.008200
0.003950	0.007900
0.003483	0.008000
0.002850	0.008200
0.003333	0.008200
0.003383	0.008400
0.002783	0.008100
0.003133	0.007700
0.003317	0.008300
0.002783	0.007700
0.002850	0.007400


CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1568, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
  (drop_layer): Dropout(p=0.2, inplace=False)
)