In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import numpy as np
%matplotlib inline

In [44]:
input_size = 28
outpu_size = 10
epochs = 3
batch_size = 64


# train_ds = datasets.MNIST('./data', train=True, transform=transforms.ToTensor())
# test_ds = datasets.MNIST('./data', train=False, transform=transforms.ToTensor())

import gzip, pickle
with gzip.open(('data/mnist.pkl.gz'), 'rb') as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')

x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
from torch.utils.data import TensorDataset

train_ds = TensorDataset(x_train, y_train)
test_ds = TensorDataset(x_valid, y_valid)

train_dl = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=batch_size*2, shuffle=True)

In [48]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1, out_channels=16,kernel_size=5, stride=1, padding=2,
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.out = nn.Linear(7*7*32, 10)
        
    def forward(self, x):
#         x = x.reshape(1, 28, 28)
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.shape[0], -1)
        x = self.out(x)
        return x
    

In [14]:
def accuracy(prediction, labels):
    pred = torch.max(prediction.data, 1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights, len(labels)

In [51]:
net = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = 0.001)

for epoch in range(epochs):
    train_rights = []
    for bi, (data, target) in enumerate(train_dl):
        net.train()
        data = data.reshape(-1, 1, 28, 28)
        output = net(data)
        loss = criterion(output, target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        right = accuracy(output, target)
        
        train_rights.append(right)
        
        if bi % 100 == 0:
            net.eval()
            val_rights = []
            
            for (data, target) in test_dl:
                data = data.reshape(-1, 1, 28, 28)
                output = net(data)
                right = accuracy(output, target)
                val_rights.append(right)
                
            train_r = (sum([tup[0] for tup in train_rights]), sum(tup[1] for tup in train_rights))
            test_r = (sum([tup[0] for tup in val_rights]), sum(tup[1] for tup in val_rights))
            
            print('epoch={}, [{}/{}] ({:.0f}%) \t loss = {:.6f} \t trail_r = {:.2f}, \t test_r={:.2f}'.format(
                 epoch, bi*batch_size, len(train_dl.dataset),
                  100. *bi*batch_size/len(train_dl.dataset),
                  loss.data,
                  100. * train_r[0].numpy() /train_r[1],
                  100. * test_r[0].numpy() / test_r[1]
                 ))
                

epoch=0, [0/50000] (0%) 	 loss = 2.296804 	 trail_r = 9.38, 	 test_r=9.95
epoch=0, [6400/50000] (13%) 	 loss = 0.397458 	 trail_r = 76.02, 	 test_r=92.84
epoch=0, [12800/50000] (26%) 	 loss = 0.183755 	 trail_r = 84.23, 	 test_r=95.85
epoch=0, [19200/50000] (38%) 	 loss = 0.097302 	 trail_r = 87.82, 	 test_r=96.50
epoch=0, [25600/50000] (51%) 	 loss = 0.124194 	 trail_r = 89.86, 	 test_r=96.92
epoch=0, [32000/50000] (64%) 	 loss = 0.130664 	 trail_r = 91.22, 	 test_r=97.57
epoch=0, [38400/50000] (77%) 	 loss = 0.105614 	 trail_r = 92.21, 	 test_r=97.60
epoch=0, [44800/50000] (90%) 	 loss = 0.014055 	 trail_r = 92.92, 	 test_r=97.61
epoch=1, [0/50000] (0%) 	 loss = 0.024264 	 trail_r = 100.00, 	 test_r=97.36
epoch=1, [6400/50000] (13%) 	 loss = 0.131264 	 trail_r = 97.42, 	 test_r=98.13
epoch=1, [12800/50000] (26%) 	 loss = 0.033087 	 trail_r = 97.50, 	 test_r=98.26
epoch=1, [19200/50000] (38%) 	 loss = 0.084030 	 trail_r = 97.71, 	 test_r=98.38
epoch=1, [25600/50000] (51%) 	 loss = 0.1

In [52]:
# test_ds[0][0].shape

torch.cuda.empty_cache()