In [1]:
import os
os.system('pip3 install seaborn')

0

In [2]:
## Importing required packages
import matplotlib
matplotlib.use('Agg')

### Importing torch packages
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

## Importing python packages
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### To test whether GPU instance is present in the system of not.
cuda = torch.cuda.is_available()
print('Using PyTorch version:', torch.__version__, 'CUDA:', cuda)
### If cuda is a gpu instance. If it's false then we run the program on CPU
### If cuda is a gpu instance. If it's true then we run the program on GPU
torch.manual_seed(42)
if cuda:
    torch.cuda.manual_seed(42)

Using PyTorch version: 0.3.1 CUDA: False


Now, we'll load the MNIST data. First time we may have to download the data, which can take a while.

In [3]:
### Initializing batch size
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

## Loading the train set file
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)

## Loading the test set file
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)

The train and test data are provided via data loaders that provide iterators over the datasets. The first element of training data (X_train) is a 4th-order tensor of size (batch_size, 1, 28, 28), i.e. it consists of a batch of images of size 1x28x28 pixels. y_train is a vector containing the correct classes ("0", "1", ..., "9") for each training digit.

In [4]:
for (X_train, y_train) in train_loader:
    print('X_train:', X_train.size(), 'type:', X_train.type())
    print('y_train:', y_train.size(), 'type:', y_train.type())
    break

X_train: torch.Size([32, 1, 28, 28]) type: torch.FloatTensor
y_train: torch.Size([32]) type: torch.LongTensor


#### Plotting the  first 10 training digits

In [5]:
pltsize=1
plt.figure(figsize=(10*pltsize, pltsize))

for i in range(10):
    plt.subplot(1,10,i+1)
    plt.axis('off')
    plt.imshow(X_train[i,:,:,:].numpy().reshape(28,28), cmap="gray")
    plt.title('Class: '+str(y_train[i]))

#### MLP network definition
Let's define the network as a Python class. We have to write the __init__() and forward() methods, and PyTorch will automatically generate a backward() method for computing the gradients for the backward pass.

Finally, we define an optimizer to update the model parameters based on the computed gradients. We select stochastic gradient descent (with momentum) as the optimization algorithm, and set learning rate to 0.01. Note that there are several different options for the optimizer in PyTorch that we could use instead of SGD.

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 50)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(50, 50)
        self.fc2_drop = nn.Dropout(0.2)
        self.fc3 = nn.Linear(50, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = self.fc1_drop(x)
        x = F.relu(self.fc2(x))
        x = self.fc2_drop(x)
        return F.log_softmax(self.fc3(x))

model = Net()
if cuda:
    model.cuda()
    
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

print(model)

Net(
  (fc1): Linear(in_features=784, out_features=50, bias=True)
  (fc1_drop): Dropout(p=0.2)
  (fc2): Linear(in_features=50, out_features=50, bias=True)
  (fc2_drop): Dropout(p=0.2)
  (fc3): Linear(in_features=50, out_features=10, bias=True)
)


#### Learning
Let's now define functions to train() and test() the model.

In [7]:
def train(epoch, log_interval=100):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

In [8]:
def test(loss_vector, accuracy_vector):
    model.eval()
    test_loss, correct = 0, 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss /= len(test_loader)
    loss_vector.append(test_loss)

    accuracy = 100. * correct / len(test_loader.dataset)
    accuracy_vector.append(accuracy)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), accuracy))

Now we are ready to train our model using the train() function. An epoch means one pass through the whole training data. After each epoch, we evaluate the model using test().

In [9]:
os.system('date')

0

In [10]:
epochs = 10

lossv, accv = [], []
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)

  app.launch_new_instance()



Test set: Average loss: 0.2322, Accuracy: 9305/10000 (93%)


Test set: Average loss: 0.1754, Accuracy: 9446/10000 (94%)


Test set: Average loss: 0.1563, Accuracy: 9514/10000 (95%)


Test set: Average loss: 0.1371, Accuracy: 9567/10000 (96%)


Test set: Average loss: 0.1274, Accuracy: 9607/10000 (96%)


Test set: Average loss: 0.1248, Accuracy: 9615/10000 (96%)


Test set: Average loss: 0.1165, Accuracy: 9648/10000 (96%)


Test set: Average loss: 0.1079, Accuracy: 9667/10000 (97%)




Test set: Average loss: 0.1076, Accuracy: 9660/10000 (97%)


Test set: Average loss: 0.1079, Accuracy: 9678/10000 (97%)



In [None]:
os.system('date')