In [5]:
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import torch.nn.functional as F
import numpy as np
from tqdm.notebook import tqdm 


# torch.manual_seed(1)

EPOCH = 40
LR = 0.012
DOWNLOAD_MNIST = True
torch.manual_seed(1314)

device = torch.device("cuda")

train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, transform=torchvision.transforms.ToTensor(),
                                        download=DOWNLOAD_MNIST, )
test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)

print(train_data.data.shape)

train_x = torch.unsqueeze(train_data.data, dim=1).type(torch.FloatTensor) / 255.
train_y = train_data.targets
print(train_x.shape)

test_x = torch.unsqueeze(test_data.data, dim=1).type(torch.FloatTensor)[:2000] / 255.  # Tensor on GPU
test_y = test_data.targets[:2000]
test_x = test_x.to(device)
test_y = test_y.to(device)

# Network 1 
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2,),
                                   nn.BatchNorm2d(16),
                                   nn.ReLU(),
                                   nn.MaxPool2d(kernel_size=2),)
        self.conv2 = nn.Sequential(nn.Conv2d(16, 32, 5, 1, 2),
                                   nn.BatchNorm2d(32),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2),)
        self.conv3 = nn.Sequential(nn.Conv2d(32, 64, 3, 1, 2),
                                   nn.BatchNorm2d(64),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2),
                                   nn.Dropout(),)
        self.fc1 = nn.Sequential(
               nn.Linear(64 * 4 * 4, 256),
               nn.BatchNorm1d(256),
               nn.ReLU(),
               nn.Dropout()
            )
        self.out = nn.Linear(256, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        # print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        output = self.out(x)
        return output

cnn = CNN()
print(cnn)

# print('\nParameter Info:')
# for name, parameters in cnn.named_parameters():
#     print(name, ':', parameters.size())



torch.Size([60000, 28, 28])
torch.Size([60000, 1, 28, 28])
CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.5, inplace=False)
  )
  (fc1): Sequential(
    

In [6]:
model = CNN()
model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()

data_size = 60000
batch_size = 150

for epoch in tqdm(range(EPOCH)):
    random_indx = np.random.permutation(data_size)
    for batch_i in range(data_size // batch_size):
        model.train()
        indx = random_indx[batch_i * batch_size:(batch_i + 1) * batch_size]

        b_x = train_x[indx, :]
        b_y = train_y[indx]
        b_x = b_x.to(device)
        b_y = b_y.to(device)
#         print(b_x.shape)
#         print(b_y.shape)
#         pdb.set_trace()

        output = model(b_x)
    
        loss = loss_func(output, b_y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch_i % 100 == 0:
            model.eval()
            with torch.no_grad():
                test_output = model(test_x)
                pred_y = torch.max(test_output, 1)[1].data.squeeze()
                # pred_y = torch.max(test_output, 1)[1].data.squeeze()
                accuracy = torch.sum(pred_y == test_y).type(torch.FloatTensor) / test_y.size(0)
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy(), '| test accuracy: %.3f' % accuracy)

test_output = model(test_x[:10])
pred_y = torch.max(test_output, 1)[1].data.squeeze()  # move the computation in GPU

print(pred_y, 'prediction number')
print(test_y[:10], 'real number')

  0%|          | 0/40 [00:00<?, ?it/s]

Epoch:  0 | train loss: 2.4949 | test accuracy: 0.079
Epoch:  0 | train loss: 0.9415 | test accuracy: 0.851
Epoch:  0 | train loss: 0.5668 | test accuracy: 0.919
Epoch:  0 | train loss: 0.3542 | test accuracy: 0.941
Epoch:  1 | train loss: 0.2664 | test accuracy: 0.946
Epoch:  1 | train loss: 0.3013 | test accuracy: 0.953
Epoch:  1 | train loss: 0.2470 | test accuracy: 0.955
Epoch:  1 | train loss: 0.1483 | test accuracy: 0.962
Epoch:  2 | train loss: 0.1287 | test accuracy: 0.965
Epoch:  2 | train loss: 0.1932 | test accuracy: 0.968
Epoch:  2 | train loss: 0.2644 | test accuracy: 0.970
Epoch:  2 | train loss: 0.1774 | test accuracy: 0.970
Epoch:  3 | train loss: 0.1493 | test accuracy: 0.974
Epoch:  3 | train loss: 0.1391 | test accuracy: 0.975
Epoch:  3 | train loss: 0.1620 | test accuracy: 0.975
Epoch:  3 | train loss: 0.0809 | test accuracy: 0.978
Epoch:  4 | train loss: 0.1182 | test accuracy: 0.979
Epoch:  4 | train loss: 0.1202 | test accuracy: 0.979
Epoch:  4 | train loss: 0.11