# Goal of this tutorial
- Understand PyTorch's Tensor library and neural network at a high level.
- Train a small neural network to classify images.

# Pytorch is a python-based scientific computing package targeted at two sets of audiences:
- A replacement for NumPy to use the power of GPUs.
- A deep learning research platform that provides maximum flexibility and speed.


# Basic Tensor Operation

In [1]:
import torch


In [2]:
# construct a 5*3 matrix, uninitialized
x = torch.empty(5, 3)
print(x)

tensor([[7.3387e-27, 4.5743e-41, 7.3387e-27],
        [4.5743e-41, 3.9918e-34, 4.5743e-41],
        [3.9919e-34, 4.5743e-41, 4.1364e-34],
        [4.5743e-41, 4.1597e-34, 4.5743e-41],
        [3.9915e-34, 4.5743e-41, 3.9958e-34]])


In [3]:
# construct a randomly initialized matrix
x = torch.rand(5, 3)
print(x)

tensor([[0.5982, 0.3522, 0.9689],
        [0.1569, 0.4624, 0.4761],
        [0.2874, 0.4432, 0.3276],
        [0.6214, 0.4713, 0.2380],
        [0.0552, 0.3525, 0.8995]])


In [4]:
# construct a matrix filled zeros and of dtype long
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [5]:
# construct a tensor directly from data
x = torch.tensor([5.5, 3])
x

tensor([5.5000, 3.0000])

In [7]:
# create a tensor based on an existing tensor. These methods will reuse properties of the input tensor,
# e.g. dtype, unless new value are provided by user.

x = x.new_ones(5, 3, dtype=torch.double)
print(x)

x = torch.rand_like(x, dtype=torch.float)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[0.7446, 0.6253, 0.2510],
        [0.1328, 0.6675, 0.6332],
        [0.5705, 0.4489, 0.8179],
        [0.6915, 0.8446, 0.2655],
        [0.1277, 0.4293, 0.5286]])


In [12]:
print(x.size())

# torch.Size is tuple, so it suppors all tuple operations.


torch.Size([5, 3])


In [13]:
y = torch.rand(5, 3)
print(x + y)

tensor([[0.8961, 1.2750, 1.1665],
        [0.2165, 1.1885, 0.9298],
        [0.9285, 1.1591, 1.2062],
        [0.8279, 0.9990, 1.1274],
        [0.5562, 0.8172, 0.8933]])


In [14]:
print(torch.add(x, y))

tensor([[0.8961, 1.2750, 1.1665],
        [0.2165, 1.1885, 0.9298],
        [0.9285, 1.1591, 1.2062],
        [0.8279, 0.9990, 1.1274],
        [0.5562, 0.8172, 0.8933]])


In [15]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[0.8961, 1.2750, 1.1665],
        [0.2165, 1.1885, 0.9298],
        [0.9285, 1.1591, 1.2062],
        [0.8279, 0.9990, 1.1274],
        [0.5562, 0.8172, 0.8933]])


In [16]:
# any operation that mutates a tensor in-place is post-fixed with an _. 
# For example: x.copy_(y), x.t_(), will change x
y.add_(x)
print(y)

tensor([[0.8961, 1.2750, 1.1665],
        [0.2165, 1.1885, 0.9298],
        [0.9285, 1.1591, 1.2062],
        [0.8279, 0.9990, 1.1274],
        [0.5562, 0.8172, 0.8933]])


In [19]:
# We can use standard numpy-like indexing with all bell and whistles!
print(x[:, 1])

tensor([0.6253, 0.6675, 0.4489, 0.8446, 0.4293])


In [21]:
# Resizing. 
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1, 8) # the size of -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [23]:
x = torch.randn(1)
print(x)
print(x.item())

tensor([0.6097])
0.6096804738044739


In [29]:
# converting a torch tensor to a numpy array
# the torch tensor and numpy array will share their underlying memory locations, and changing
# one will change the other. 

a = torch.ones(5)
print("Tensor:",a)

b = a.numpy()
print("Numpy Array: ",b)

a.add_(1)
print(a)
print(b)


Tensor: tensor([1., 1., 1., 1., 1.])
Numpy Array:  [1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [30]:
# converting numpy array to torch tensor
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


# Autograd

In [33]:
# create a tensor and set requires_grad=True to track computation with it.
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [37]:
# tensor operation
y = x + 2
print(y)
print(y.grad_fn)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward>)
<AddBackward object at 0x7f82b5505e10>


In [40]:
z = y * y * 3
out = z.mean()
print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward>)
tensor(27., grad_fn=<MeanBackward1>)


In [45]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f82b54e1470>


In [47]:
# Gradients
out.backward()

In [48]:
# print gradients d(out)/dx
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [57]:
#We can do many crazy things with autograd!
x = torch.randn(3, requires_grad=True)

y = x * 2

while y.data.norm() < 1000:
    y = y * 2

print(y)

tensor([ -390.5672, -1009.0579,  -651.2288], grad_fn=<MulBackward>)


In [58]:
gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)

print(x.grad)

tensor([ 102.4000, 1024.0000,    0.1024])


In [59]:
print(x.requires_grad)
print((x**2).requires_grad)

with torch.no_grad():
    print((x**2).requires_grad)

True
True
False


## Neural Network

In [75]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        # 1 input image channel, 6 output channels, 5*5 square convolution.
        
        #kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    # When we have define the forward function, and the backward function (where gradients are computed)
    # is automatically defined using autograd. 
    def forward(self, x):
        # Max pooling over a (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        
        # If the size is a square you can apply specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    def num_flat_features(self, x):

        size = x.size()[1:]
        num_features = 1
        
        for s in size:
            num_features *= s
        
        return num_features

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [76]:
# learnable parameters of a model
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [77]:
# torch.nn only support mini-batches. The entire torch.nn package only supports inputs that are a 
# mini-batch of samples, and not a single sample. 

input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0878,  0.0156,  0.0493, -0.0649,  0.0512, -0.1083,  0.0511, -0.0369,
         -0.0200,  0.0198]], grad_fn=<ThAddmmBackward>)


In [78]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [79]:
# loss function

input = torch.randn(1, 1, 32, 32)
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)


tensor(0.5945, grad_fn=<MseLossBackward>)


In [81]:
# When we call loss.backward(), the whole graph is differentiated w.r.t. the loss, and all Tensors
# in the graph that has requires_grad=True will have their .grad Tensor accumulated with the gradient.

net.zero_grad() # zeroes the gradient buffers of all parameters.

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv2.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv2.bias.grad after backward
tensor([ 0.0011, -0.0035,  0.0047, -0.0008,  0.0017,  0.0098])


In [82]:
# update the weights
# simplest update using SGD

learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [85]:
# update weights using the other update rules such as SGD, Adam, RMSProp.
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in training loop:
optimizer.zero_grad()
output = net(input)

loss = criterion(output, target)
loss.backward()
optimizer.step() # Does the update