In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

In [2]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])

In [3]:
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 113013714.30it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 38346215.20it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 32053779.43it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 13854930.01it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

In [5]:
trainloader = torch.utils.data.DataLoader(trainset,batch_size=128,shuffle=True,num_workers=2)
testloader = torch.utils.data.DataLoader(testset,batch_size=128,shuffle=False,num_workers=2)

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,32,3)
        self.conv2 = nn.Conv2d(32,64,3)
        self.pool = nn.MaxPool2d(2)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dense1 = nn.Linear(64*12*12,128)
        self.dropout2 = nn.Dropout(0.5)
        self.dense2 = nn.Linear(128,10)
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.dropout1(x)
        x = x.view(-1,64*12*12)
        x = F.relu(self.dense1(x))
        x = self.dropout2(x)
        x = self.dense2(x)
        return x

net = Net()
net.to(device)
print(net)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout1): Dropout2d(p=0.25, inplace=False)
  (dense1): Linear(in_features=9216, out_features=128, bias=True)
  (dropout2): Dropout(p=0.5, inplace=False)
  (dense2): Linear(in_features=128, out_features=10, bias=True)
)


In [8]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(),lr=0.001)

In [9]:
def train(model, device, train_loader, optimizer, criterion, epoch, train_losses):
    model.train()
    train_loss = 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward(retain_graph=True)
        optimizer.step()
        train_loss += loss.item()*data.size(0)

    train_loss = ((train_loss)/len(train_loader.dataset))
    train_losses.append(train_loss)

    print("Train Epoch: %d Train Loss: %.4f." % (epoch, train_loss))


def test(model, device, test_loader, criterion, epoch, test_losses, accuracies):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss = (test_loss/len(test_loader.dataset))

    accuracy = (100*correct)/len(test_loader.dataset)
    accuracies.append(accuracy)
    test_losses.append(test_loss)

    print("Test Epoch: %d Test Loss: %.4f Test Accuray: %.2f." %
          (epoch, test_loss, accuracy))


def fit(model, device, train_loader, test_loader, optimizer, criterion, no_of_epochs):
    train_losses = []
    test_losses = []
    accuracies = []
    for epoch in range(0, no_of_epochs):
        train(model, device, train_loader, optimizer,
              criterion, epoch, train_losses)
        test(model, device, test_loader, criterion,
             epoch, test_losses, accuracies)
    return train_losses, test_losses, accuracies

In [10]:
train_losses,test_losses,accuracies = fit(net,device,trainloader,testloader,optimizer,criterion,10)

Train Epoch: 0 Train Loss: 0.2271.
Test Epoch: 0 Test Loss: 0.0004 Test Accuray: 98.47.
Train Epoch: 1 Train Loss: 0.0891.
Test Epoch: 1 Test Loss: 0.0003 Test Accuray: 98.70.
Train Epoch: 2 Train Loss: 0.0669.
Test Epoch: 2 Test Loss: 0.0003 Test Accuray: 98.83.
Train Epoch: 3 Train Loss: 0.0568.
Test Epoch: 3 Test Loss: 0.0002 Test Accuray: 99.04.
Train Epoch: 4 Train Loss: 0.0487.
Test Epoch: 4 Test Loss: 0.0002 Test Accuray: 99.06.
Train Epoch: 5 Train Loss: 0.0418.
Test Epoch: 5 Test Loss: 0.0002 Test Accuray: 99.03.
Train Epoch: 6 Train Loss: 0.0368.
Test Epoch: 6 Test Loss: 0.0002 Test Accuray: 99.14.
Train Epoch: 7 Train Loss: 0.0365.
Test Epoch: 7 Test Loss: 0.0002 Test Accuray: 99.14.
Train Epoch: 8 Train Loss: 0.0305.
Test Epoch: 8 Test Loss: 0.0002 Test Accuray: 99.12.
Train Epoch: 9 Train Loss: 0.0295.
Test Epoch: 9 Test Loss: 0.0002 Test Accuray: 99.29.
