In [18]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
print("PyTorch Version: ",torch.__version__)

PyTorch Version:  1.4.0+cu100


In [17]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1) # 28 * 28 -> (28+1-5) 24 * 24
        self.conv2 = nn.Conv2d(20, 50, 5, 1) # 20 * 20
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        # x: 1 * 28 * 28
        x = F.relu(self.conv1(x)) # 20 * 24 * 24
        x = F.max_pool2d(x,2,2) # 12 * 12
        x = F.relu(self.conv2(x)) # 8 * 8
        x = F.max_pool2d(x,2,2) # 4 *4 
        x = x.view(-1, 4*4*50) # reshape (5 * 2 * 10), view(5, 20) -> (5 * 20)
        x = F.relu(self.fc1(x))
        x= self.fc2(x)
        
        # return x
        return F.log_softmax(x, dim=1) # log probability
    

In [24]:
mnist_data = datasets.MNIST("./mnist_data", train=True, download=True,
                           transform=transforms.Compose([transforms.ToTensor(),]))
mnist_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./mnist_data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [25]:
mnist_data[5][0].shape

torch.Size([1, 28, 28])

In [26]:
data = [d[0].data.cpu().numpy() for d in mnist_data]

In [28]:
np.std(data)

0.30810776

In [30]:
np.mean(data)

0.13066062

In [29]:
mnist_data[230][0].shape

torch.Size([1, 28, 28])

In [31]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        pred = model(data) # batch_size * 10
        loss = F.nll_loss(pred, target) 
        
        # SGD
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if idx % 100 == 0:
            print("Train Epoch: {}, iteration: {}, Loss: {}".format(
                epoch, idx, loss.item()))

In [32]:
def test(model, device, test_loader):
    model.eval()
    total_loss = 0.
    correct = 0.
    with torch.no_grad():
        for idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)

            output = model(data) # batch_size * 10
            total_loss += F.nll_loss(output, target, reduction="sum").item() 
            pred = output.argmax(dim=1) # batch_size * 1
            correct += pred.eq(target.view_as(pred)).sum().item()
            

    total_loss /= len(test_loader.dataset)
    acc = correct/len(test_loader.dataset) * 100.
    print("Test loss: {}, Accuracy: {}".format(total_loss, acc))

In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
train_dataloader = torch.utils.data.DataLoader(
    datasets.MNIST("./mnist_data", train=True, download=True,
           transform=transforms.Compose([
               transforms.ToTensor(),
               transforms.Normalize((0.1307,), (0.3081,))
           ])),
    batch_size=batch_size, shuffle=True, 
    num_workers=1, pin_memory=True
)
test_dataloader = torch.utils.data.DataLoader(
    datasets.MNIST("./mnist_data", train=False, download=True,
           transform=transforms.Compose([
               transforms.ToTensor(),
               transforms.Normalize((0.1307,), (0.3081,))
           ])),
    batch_size=batch_size, shuffle=True, 
    num_workers=1, pin_memory=True
)

lr = 0.01
momentum  = 0.5
model = Net().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

num_epochs = 2
for epoch in range(num_epochs):
    train(model, device, train_dataloader, optimizer, epoch)
    test(model, device, test_dataloader)
    
torch.save(model.state_dict(), "mnist_cnn.pt")

Train Epoch: 0, iteration: 0, Loss: 2.2923946380615234
Train Epoch: 0, iteration: 100, Loss: 0.7753957509994507
Train Epoch: 0, iteration: 200, Loss: 0.27749717235565186
Train Epoch: 0, iteration: 300, Loss: 0.21920061111450195
Train Epoch: 0, iteration: 400, Loss: 0.2233399748802185
Train Epoch: 0, iteration: 500, Loss: 0.0768377035856247
Train Epoch: 0, iteration: 600, Loss: 0.10782098770141602
Train Epoch: 0, iteration: 700, Loss: 0.23150363564491272
Train Epoch: 0, iteration: 800, Loss: 0.04292915388941765
Train Epoch: 0, iteration: 900, Loss: 0.11177463084459305
Train Epoch: 0, iteration: 1000, Loss: 0.09185297042131424
Train Epoch: 0, iteration: 1100, Loss: 0.16677334904670715
Train Epoch: 0, iteration: 1200, Loss: 0.08935216069221497
Train Epoch: 0, iteration: 1300, Loss: 0.01590600609779358
Train Epoch: 0, iteration: 1400, Loss: 0.14203670620918274
Train Epoch: 0, iteration: 1500, Loss: 0.058041635900735855
Train Epoch: 0, iteration: 1600, Loss: 0.12018963694572449
Train Epoch: