# Quick Introduction to PyTorch

First, lets load a dataset. This is extremely easy with the loaders provided by PyTorch (we will not focus on these, since they are not usually used in Deep RL)

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

In [2]:
train_set = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_set = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=1)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True, num_workers=1)


Defining a network is quite straightforward:

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.fc1 = nn.Linear(64 * 5 * 5, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, 64 * 5 * 5)
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.fc2(x))
        return x


net = Net()

# Move the network to the GPU
net = net.cuda()

The nn.Module takes cares of most of the logistics (e.g., keeping track of the parameters of the network, etc.). We should also define the metric according to which the network will be optimized, as well the optimizer that will be used.

In [4]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

Note that the nn.CrossEntropyLoss() combines the Softmax activation and the cross-entropy loss into one.

We are now ready to train the network. Note that we have to write the actual training loop.

In [5]:
def train_network(net, loader, criterion, optimizer, iters=30):
    for epoch in range(iters): 

        current_loss = 0.0
        for data in loader:

            inputs, labels = data

            #Move the data to the GPU
            inputs, labels = inputs.cuda(), labels.cuda()

            optimizer.zero_grad()

            # Get the network output
            outputs = net(inputs)

            # Calculate the loss
            loss = criterion(outputs, labels)

            # Back-propagate the loss
            loss.backward()

            # Run the optimizer for one step
            optimizer.step()

            # print statistics
            current_loss += loss.item()
        print ("epoch %d - total loss: %5.4f"%(epoch+1, current_loss))

train_network(net, train_loader, criterion, optimizer, iters=20 )

epoch 1 - total loss: 377.8469
epoch 2 - total loss: 84.9901
epoch 3 - total loss: 65.3377
epoch 4 - total loss: 56.0823
epoch 5 - total loss: 49.8744
epoch 6 - total loss: 45.1537
epoch 7 - total loss: 41.2219
epoch 8 - total loss: 37.7816
epoch 9 - total loss: 35.7738
epoch 10 - total loss: 32.0974
epoch 11 - total loss: 31.8087
epoch 12 - total loss: 28.8356
epoch 13 - total loss: 29.6300
epoch 14 - total loss: 27.2517
epoch 15 - total loss: 24.8246
epoch 16 - total loss: 25.2840
epoch 17 - total loss: 24.6760
epoch 18 - total loss: 22.6854
epoch 19 - total loss: 23.1305
epoch 20 - total loss: 19.9809


Write a function for evaluating the network and measuring the accuracy using a data loader:

In [6]:
def evaluate_net(net, loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.cuda(), labels.cuda()
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100.0 * correct / total

Evaluate the network and measure the train and test errors!

In [7]:
print("Train error = ", 100 - evaluate_net(net, train_loader))
print("Test error = ", 100 - evaluate_net(net, test_loader))

Train error =  0.7833333333333314
Test error =  1.2199999999999989


#### Does the order of the samples matter? Can we train the network with batches of correlated samples?

Let's sort the data according to their labels.

In [8]:
import numpy as np
# A dirty-hack to sort the samples
idx = np.argsort(train_set.train_labels)
train_set.train_labels = train_set.train_labels[idx]
train_set.train_data = train_set.train_data[idx]
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=False, num_workers=1)


Let's train a network again!

In [9]:
net = Net()
net = net.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

train_network(net, train_loader, criterion, optimizer, iters=10)


epoch 1 - total loss: 2093.9526
epoch 2 - total loss: 2159.8300
epoch 3 - total loss: 2159.8273
epoch 4 - total loss: 2159.8246
epoch 5 - total loss: 2148.9803
epoch 6 - total loss: 2160.8550
epoch 7 - total loss: 2159.9004
epoch 8 - total loss: 2159.8268
epoch 9 - total loss: 2160.4560
epoch 10 - total loss: 2159.8247


In [10]:
print("Train error = ", 100 - evaluate_net(net, train_loader))
print("Test error = ", 100 - evaluate_net(net, test_loader))

Train error =  90.13666666666667
Test error =  90.2


Trying with a smaller learning rate

In [11]:
net = Net()
net = net.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)

train_network(net, train_loader, criterion, optimizer, iters=10)


epoch 1 - total loss: 1748.0338
epoch 2 - total loss: 1417.3277
epoch 3 - total loss: 783.4411
epoch 4 - total loss: 289.9167
epoch 5 - total loss: 164.8857
epoch 6 - total loss: 124.0721
epoch 7 - total loss: 103.8610
epoch 8 - total loss: 92.8206
epoch 9 - total loss: 89.1798
epoch 10 - total loss: 83.6032


In [12]:
print("Train error = ", 100 - evaluate_net(net, train_loader))
print("Test error = ", 100 - evaluate_net(net, test_loader))

Train error =  64.20333333333333
Test error =  63.78


In [13]:
train_network(net, train_loader, criterion, optimizer, iters=40)
print("Train error = ", 100 - evaluate_net(net, train_loader))
print("Test error = ", 100 - evaluate_net(net, test_loader))

epoch 1 - total loss: 75.5576
epoch 2 - total loss: 66.1821
epoch 3 - total loss: 61.7868
epoch 4 - total loss: 59.1433
epoch 5 - total loss: 58.0251
epoch 6 - total loss: 52.7888
epoch 7 - total loss: 53.0982
epoch 8 - total loss: 52.6813
epoch 9 - total loss: 51.5730
epoch 10 - total loss: 52.2762
epoch 11 - total loss: 56.4309
epoch 12 - total loss: 51.6152
epoch 13 - total loss: 45.4000
epoch 14 - total loss: 49.6726
epoch 15 - total loss: 40.2434
epoch 16 - total loss: 41.9442
epoch 17 - total loss: 42.3096
epoch 18 - total loss: 43.0505
epoch 19 - total loss: 44.1663
epoch 20 - total loss: 43.9261
epoch 21 - total loss: 42.1905
epoch 22 - total loss: 40.8568
epoch 23 - total loss: 40.6121
epoch 24 - total loss: 39.0650
epoch 25 - total loss: 41.7234
epoch 26 - total loss: 41.4370
epoch 27 - total loss: 36.7540
epoch 28 - total loss: 37.7497
epoch 29 - total loss: 34.4935
epoch 30 - total loss: 40.1772
epoch 31 - total loss: 40.1458
epoch 32 - total loss: 40.9589
epoch 33 - total 

This was just a brief intro to PyTorch. Great tutorials are available at [https://pytorch.org/tutorials](https://pytorch.org/tutorials)