# Introduction to PyTorch
This notebook introduces and demonstrates the basic functionality of PyTorch.

In [3]:
import torch
import numpy as np
print(torch.__version__)

0.4.0


### Tensors
PyTorch tensors are the base variable used for computation, and can the dimension of most classic variables (scalar, vector, matrix). They are similar in use to Numpy ndarrays.

torch.tensor(data) creates tensor from data
torch.* creates a tensor with given initialization (empty, zeros, ones, random, etc)
torch.*_like(input) creates a tensor with a given initialization, but copies the properties of the input (size, dtype, etc)

see https://pytorch.org/docs/master/torch.html#tensor-creation-ops

In [4]:
# initialize an empty matrix
x = torch.empty(5,3)

# randomly initialize a vector
x = torch.rand(4)

# zero initialize a matrix of datatrype long
x = torch.zeros(3,3,dtype=torch.long)

# construct a tensor from data
x = torch.tensor([[5.5, 3], [6, 2.1]])

# create a copy of an existing tensor
y = torch.zeros_like(x)

# find the size
y.size();

### Operations
There are multiple syntaxes for operations.
see all operations here https://pytorch.org/docs/stable/torch.html

In [5]:
x = torch.rand(5,3)
y = torch.rand(5,3)

# addition
z = x + y

z = torch.add(x,y)

z = torch.empty_like(x) # provides output tensor
torch.add(x, y, out=z)

y.add_(x); # adds inplace (adds x to y)

### Working with numpy
Most PyTorch operations are basically the same as the Numpy equivalent, and it's easy to convert variables between the two.

In [6]:
# convert from torch to numpy
x = torch.ones(5)
y = x.numpy() # note that they now share memory (changing one changes the other)

# convert from numpy to torch
x = np.ones(5)
y = torch.from_numpy(x) # again, they share the same memory

### Working with gradients
The autograd package provides automatic differentiation for all tensor operations.

The *requires_grad* flag for a tensor indicates that it will be part backpropagation. All results of calculations involving the initial tensor will have the same flag.

The *.grad* attribute stores the gradients calculated during backpropagation

*X.backward()* backpropagates from variable X. If X is non-scalar, then you need to specify gradients (X.backward(gradients)) 

see documentation here: https://pytorch.org/docs/stable/autograd.html

In [7]:
x = torch.ones(2, 2, requires_grad=True)
y = x**2

# the grad_fn attribute tracks which gradient operation will be needed
print(y.grad_fn)

z = y.mean()
print(z.grad_fn)

# backpropagate
z.backward()

# print the gradients
print(x.grad)

<PowBackward0 object at 0x7f86e028cc50>
<MeanBackward1 object at 0x7f86e028cc50>
tensor([[ 0.5000,  0.5000],
        [ 0.5000,  0.5000]])


## Neural Networks
The neural network package torch.nn depends on autograd to define models and differentiate them. nn.Module defines the layers of the net, and the method forward() performs forward propagation.

As an example, let's work on the MNIST dataset using a convnet.
<img src="mnist.png">

https://pytorch.org/docs/stable/nn.html?highlight=nn%20conv2d#torch.nn.Conv2d

https://pytorch.org/docs/stable/nn.html?highlight=nn%20linear#torch.nn.Linear

* Step 1: initialize the weights
* Step 2: define forward propagation (backward propagation is done automatically by autograd)

In [10]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # declare the kernels for the two convolution operations
        ## convolution 1: 1 32x32 input image, 6 output images, 5x5 kernel
        ## convolution 2: 6 14x14 input images, 16 output images, 5x5 kernel
        ## nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True)
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # declare the weights for the fully-connected layers
        ## nn.Linear(in_features, out_features, bias=True)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    # define forward propagation
    def forward(self, x):
        # convolution 1
        x = F.relu(self.conv1(x))
        # maxpool 1 (2x2 window)
        x = F.max_pool2d(x, (2,2))
        # convolution 2
        x = F.relu(self.conv2(x))
        # maxpool 2 (2x2 window)
        x = F.max_pool2d(x, 2)
        # reshape the data into a vector
        x = x.view(-1, self.n_flat_features(x))
        # fully-connected 1
        x = F.relu(self.fc1(x))
        # fully-connected 2
        x = F.relu(self.fc2(x))
        # output
        return self.fc3(x)
    
    # calculates the total number of features
    def n_flat_features(self, x):
        sz = x.size()[1:]
        n_features = 1
        for s in sz:
            n_features *= s
        return n_features

In [17]:
# create an instance of the net
net = Net()
print(net)

# try a random 32x32 input
# torch.nn only accepts minibatch inputs (4D vectors)
# size should be nSamples x nChannels x height x width
data_in = torch.randn(1, 1, 32, 32)
out = net(data_in)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


### Defining the loss function and backpropagating
Calculate how close the neural net's output is to the desired value/classification
torch.nn has a number of loss functions.

In [18]:
# for example, use mean squared error loss
target = torch.range(1,10)  # a dummy target
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(out, target)
print(loss)

# zero the gradients
net.zero_grad()
# backprop with random gradients
loss.backward()

tensor(38.6260)


### Update the weights

In [19]:
learning_rate = 0.01
# update each of the parameters: weight = weight - learning_rate * gradient 
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [20]:
# # for better gradient descent implementations, use torch.optim
# import torch.optim as optim

# # create your optimizer
# optimizer = optim.SGD(net.parameters(), lr=0.01)

# # in your training loop:
# optimizer.zero_grad()   # zero the gradient buffers
# output = net(input)
# loss = criterion(output, target)
# loss.backward()
# optimizer.step()    # Does the update

# Training a classifier (full example)
Using the CIFAR10 dataset, which has images of size 3 x 32 x 32 labelled as classes ‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’.

For list of available datasets, see https://pytorch.org/docs/master/torchvision/datasets.html

In [21]:
import torchvision # contains dataloaders for common datasets
import torchvision.transforms as transforms

In [22]:
# import the CIFAR10 dataset
## imported images are PIL images of range [0,1], convert to tensor of normalized range [-1,1]
## transforms.Normalize(mean, std_dev)
T = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=T)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=T)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
Files already downloaded and verified


In [23]:
import torch.nn as nn
import torch.nn.functional as F

# copy above network (comments removed)
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    # define forward propagation
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, (2,2))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
    
net = Net()

# use cross-entropy loss and SGD with momentum
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(4):
    running_loss = 0.0
    
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        
        # zero gradients
        optimizer.zero_grad()
        
        # forwardprop
        outputs = net(inputs)
        # calculate loss
        loss = criterion(outputs, labels)
        # backprop
        loss.backward()
        # optimize
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Training complete.')

[1,  2000] loss: 2.203
[1,  4000] loss: 1.858
[1,  6000] loss: 1.684
[1,  8000] loss: 1.611
[1, 10000] loss: 1.536
[1, 12000] loss: 1.464
[2,  2000] loss: 1.418
[2,  4000] loss: 1.375
[2,  6000] loss: 1.338
[2,  8000] loss: 1.323
[2, 10000] loss: 1.305
[2, 12000] loss: 1.274
[3,  2000] loss: 1.199
[3,  4000] loss: 1.206
[3,  6000] loss: 1.197
[3,  8000] loss: 1.192
[3, 10000] loss: 1.163
[3, 12000] loss: 1.155
[4,  2000] loss: 1.087
[4,  4000] loss: 1.087
[4,  6000] loss: 1.084
[4,  8000] loss: 1.077
[4, 10000] loss: 1.096
[4, 12000] loss: 1.092
Training complete.


In [24]:
dataiter = iter(testloader)
images, labels = dataiter.next()

# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

NameError: name 'imshow' is not defined

In [30]:
output = net(images)

_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))

Predicted:  truck  frog truck  ship


In [31]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 60 %


In [32]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 66 %
Accuracy of   car : 69 %
Accuracy of  bird : 45 %
Accuracy of   cat : 32 %
Accuracy of  deer : 61 %
Accuracy of   dog : 55 %
Accuracy of  frog : 75 %
Accuracy of horse : 53 %
Accuracy of  ship : 73 %
Accuracy of truck : 72 %
