In [29]:
!pip install torchvision torchsummary

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting torchsummary
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/7d/18/1474d06f721b86e6a9b9d7392ad68bed711a02f3b61ac43f13c719db50a6/torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1


In [3]:
import torch
from torch import nn
from torchvision import datasets, transforms

In [9]:
import os 

os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [11]:
device

'cuda'

## load data

In [12]:
train_dataset = datasets.MNIST(root='../data/', download=True, train=True, transform=transforms.ToTensor())
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../data/
    Split: Train
    StandardTransform
Transform: ToTensor()

In [13]:
test_dataset = datasets.MNIST(root='../data/', download=True, train=False, transform=transforms.ToTensor())
test_dataset

Dataset MNIST
    Number of datapoints: 10000
    Root location: ../data/
    Split: Test
    StandardTransform
Transform: ToTensor()

In [14]:
train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

In [18]:
# batch_size, channels, h, w
images, labels = next(iter(train_dataloader))
images.shape

torch.Size([64, 1, 28, 28])

## build model

The formula for output height and width:
$$ ( H_{out} = \left\lfloor \frac{H_{in} + 2p - d(k-1) - 1}{s} + 1 \right\rfloor ) $$
$$ ( W_{out} = \left\lfloor \frac{W_{in} + 2p - d(k-1) - 1}{s} + 1 \right\rfloor ) $$

Substituting values:
$$ ( H_{out} = \left\lfloor \frac{32 + 2(1) - 1(3-1) - 1}{1} + 1 \right\rfloor = 32 ) $$
$$ ( W_{out} = \left\lfloor \frac{32 + 2(1) - 1(3-1) - 1}{1} + 1 \right\rfloor = 32 ) $$

### Output Shape:
$$ ((N, C_{out}, H_{out}, W_{out}) = (8, 16, 32, 32)) $$

In [65]:
class CNN(nn.Module):
    def __init__(self, n_channel, n_classes):
        super(CNN, self).__init__()
        # conv2d: (b, 1, 28, 28) -> (b, 16, 28, 28)
        # maxpool2d: (b, 16, 28, 28) -> (b, 16, 14, 14)
        # p=2, d=1, k=5, s=1
        self.cnn1 = nn.Sequential(
            nn.Conv2d(n_channel, 16, kernel_size=5, padding=2, stride=1, dilation=1), # lower[(28+2*2-1*(5-1)-1)/1+1]=28
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2) # lower[(28+2*1-1*(2-1)-1)/2+1]=14
        )
        
        self.cnn2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2, stride=1, dilation=1), # lower[(14+2*2-1*(5-1)-1)/1+1]=14
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2), # lower[(14+2*0-1*(2-1)-1)/2+1]=7
        )
        
        self.fc1 = nn.LazyLinear(n_classes)
        
    def forward(self, x):
        out = self.cnn1(x)
        out = self.cnn2(out)
        # (b, 32, 7, 7) -> (b, 32*7*7)
        out = out.reshape(out.shape[0], -1)
        out = self.fc1(out)
        return out

In [66]:
torch.randn(32,16,7,7).reshape(32,-1).shape

torch.Size([32, 784])

In [67]:
model = CNN(n_channel=1, n_classes=10).to(device)
model

CNN(
  (cnn1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (cnn2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): LazyLinear(in_features=0, out_features=10, bias=True)
)

In [68]:
from torchsummary import summary
summary(model, input_size=(1, 28, 28), batch_size=64)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [64, 16, 28, 28]             416
       BatchNorm2d-2           [64, 16, 28, 28]              32
              ReLU-3           [64, 16, 28, 28]               0
         MaxPool2d-4           [64, 16, 14, 14]               0
            Conv2d-5           [64, 32, 14, 14]          12,832
       BatchNorm2d-6           [64, 32, 14, 14]              64
              ReLU-7           [64, 32, 14, 14]               0
         MaxPool2d-8             [64, 32, 7, 7]               0
            Linear-9                   [64, 10]          15,690
Total params: 29,034
Trainable params: 29,034
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 29.86
Params size (MB): 0.11
Estimated Total Size (MB): 30.17
-------------------------------------------

## model train

In [69]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [71]:
for epoch in range(5):
    for i, (images, labels) in enumerate(train_dataloader):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'Epoch {epoch+1}/{5}, Step {i+1}/{len(train_dataloader)}, Loss {loss.item()}')


Epoch 1/5, Step 100/938, Loss 0.006654500495642424
Epoch 1/5, Step 200/938, Loss 0.007310059387236834
Epoch 1/5, Step 300/938, Loss 0.018969446420669556
Epoch 1/5, Step 400/938, Loss 0.002156329806894064
Epoch 1/5, Step 500/938, Loss 0.0036852150224149227
Epoch 1/5, Step 600/938, Loss 0.003013398265466094
Epoch 1/5, Step 700/938, Loss 0.002072608796879649
Epoch 1/5, Step 800/938, Loss 0.005618400871753693
Epoch 1/5, Step 900/938, Loss 0.04302789643406868
Epoch 2/5, Step 100/938, Loss 0.001906794961541891
Epoch 2/5, Step 200/938, Loss 0.014682379551231861
Epoch 2/5, Step 300/938, Loss 0.01252888422459364
Epoch 2/5, Step 400/938, Loss 0.04672843590378761
Epoch 2/5, Step 500/938, Loss 0.025934644043445587
Epoch 2/5, Step 600/938, Loss 0.02330837957561016
Epoch 2/5, Step 700/938, Loss 0.11055096238851547
Epoch 2/5, Step 800/938, Loss 0.14090947806835175
Epoch 2/5, Step 900/938, Loss 0.03466073423624039
Epoch 3/5, Step 100/938, Loss 0.030125921592116356
Epoch 3/5, Step 200/938, Loss 0.00052

## model evaluation

In [72]:
total = 0
correct = 0

for images, labels in test_dataloader:
    images = images.to(device)
    labels = labels.to(device)
    
    outputs = model(images)
    predicted = torch.argmax(outputs, dim=1)
    
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy: {100*correct/total}%')

Accuracy: 99.1%


## model save

In [None]:
torch.save(model.state_dict(), 'mnist_cnn.pth')

AttributeError: 'CNN' object has no attribute 'save'