The goal of this notebook is:
1. understand basic tensor in Pytorch and understand neural networks
2. train a simple neural network using Pytorch.

# What is Pytorch

1. a replacement of numpy
2. a deep learning platform

## Tensors

In [1]:
import torch


construct matrix uninitialized

In [2]:
x = torch.empty(3,5)
print(x)

tensor([[ 0.0000, -0.0000, -0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]])


random matrix

In [3]:
x = torch.rand(5,3)
print(x)

tensor([[0.2864, 0.4463, 0.8009],
        [0.4186, 0.1173, 0.1224],
        [0.2332, 0.3796, 0.3635],
        [0.6057, 0.0508, 0.8237],
        [0.0692, 0.7585, 0.8197]])


In [4]:
x = torch.zeros(5,3, dtype=torch.long)
print(x)
print(type(x))
print(type(x[0][0]))

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
<class 'torch.Tensor'>
<class 'torch.Tensor'>


construct directly from data

In [5]:
x = torch.Tensor([3,4,5])
print(x)

tensor([3., 4., 5.])


In [6]:
print(x.size())

torch.Size([3])


construct new tensor based on old one

In [7]:
x = x.new_ones(3,3, dtype=torch.double)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [8]:
x = torch.ones_like(x, dtype=torch.int32)
print(x)

tensor([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]], dtype=torch.int32)


In [9]:
x = torch.randn_like(x, dtype=torch.float32)
print(x)
print(x.size())

tensor([[ 1.0908,  0.3771, -0.5684],
        [ 0.2456,  0.4868,  0.6290],
        [ 1.5616, -2.8548,  0.2614]])
torch.Size([3, 3])


In [10]:
x = torch.rand(3,4,4)
print(x)
print(x.size())

tensor([[[0.5597, 0.3814, 0.3743, 0.5852],
         [0.3242, 0.0196, 0.9332, 0.0485],
         [0.7294, 0.2922, 0.1860, 0.8673],
         [0.2162, 0.3669, 0.0297, 0.5610]],

        [[0.1883, 0.3718, 0.1807, 0.6414],
         [0.9965, 0.5973, 0.1137, 0.3563],
         [0.8269, 0.6626, 0.1067, 0.5239],
         [0.2682, 0.6338, 0.9908, 0.8159]],

        [[0.8661, 0.4879, 0.5275, 0.1209],
         [0.5145, 0.7656, 0.2251, 0.6859],
         [0.3675, 0.6285, 0.5916, 0.6226],
         [0.1560, 0.2315, 0.7378, 0.0149]]])
torch.Size([3, 4, 4])


## operations

In [11]:
x = torch.rand(5,3)
y = torch.ones(5,3)
print(x)
print(y)
print(x+y)

tensor([[0.5210, 0.3038, 0.2678],
        [0.6284, 0.3749, 0.5634],
        [0.1286, 0.7155, 0.3982],
        [0.7393, 0.5615, 0.1526],
        [0.2759, 0.1615, 0.1626]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1.5210, 1.3038, 1.2678],
        [1.6284, 1.3749, 1.5634],
        [1.1286, 1.7155, 1.3982],
        [1.7393, 1.5615, 1.1526],
        [1.2759, 1.1615, 1.1626]])


In [12]:
# or use torch.add
print(torch.add(x,y))

tensor([[1.5210, 1.3038, 1.2678],
        [1.6284, 1.3749, 1.5634],
        [1.1286, 1.7155, 1.3982],
        [1.7393, 1.5615, 1.1526],
        [1.2759, 1.1615, 1.1626]])


output to some variable

In [13]:
result = torch.empty(5,3)
torch.add(x, y, out=result)
print(result)

tensor([[1.5210, 1.3038, 1.2678],
        [1.6284, 1.3749, 1.5634],
        [1.1286, 1.7155, 1.3982],
        [1.7393, 1.5615, 1.1526],
        [1.2759, 1.1615, 1.1626]])


In [14]:
# another way is to add inplace
# Any operation that mutates a tensor in-place is post-fixed with an _. 
# For example: x.copy_(y), x.t_(), will change x.
#y.add_(x)
#print(y)


get part of tensor. similar to numpy

In [15]:
# second column of x
print(x[:, 1])

tensor([0.3038, 0.3749, 0.7155, 0.5615, 0.1615])


reshape the tensor

In [16]:
x = torch.rand(4,4)
y = x.view(1,16)
z = x.view(-1, 2) # -1 means the size will be inferred by other dimensions
print(x)
print(y)
print(z)
print(x.size(), y.size(), z.size())

tensor([[0.2842, 0.7860, 0.5625, 0.4784],
        [0.3343, 0.8163, 0.3659, 0.7559],
        [0.7444, 0.0327, 0.7979, 0.1698],
        [0.9490, 0.9337, 0.4154, 0.1327]])
tensor([[0.2842, 0.7860, 0.5625, 0.4784, 0.3343, 0.8163, 0.3659, 0.7559, 0.7444,
         0.0327, 0.7979, 0.1698, 0.9490, 0.9337, 0.4154, 0.1327]])
tensor([[0.2842, 0.7860],
        [0.5625, 0.4784],
        [0.3343, 0.8163],
        [0.3659, 0.7559],
        [0.7444, 0.0327],
        [0.7979, 0.1698],
        [0.9490, 0.9337],
        [0.4154, 0.1327]])
torch.Size([4, 4]) torch.Size([1, 16]) torch.Size([8, 2])


get the value of tensor

In [17]:
print(x[1][1])
print(x[1][1].item())
# print(x.item()) # only one element of tensor can be converted to Python scalars

tensor(0.8163)
0.8163231015205383


## Numpy bridge

The torch and numpy will share underlying memory locations, and change one will change the other.

In [18]:
x = torch.Tensor([1,2,3,4])
print(x)

tensor([1., 2., 3., 4.])


In [19]:
y = x.numpy()
print(y)

[1. 2. 3. 4.]


In [20]:
x.add_(1)
print(x)
print(y)

tensor([2., 3., 4., 5.])
[2. 3. 4. 5.]


In [21]:
import numpy as np
y = np.ones(4)
x = torch.from_numpy(y)
print(x)

tensor([1., 1., 1., 1.], dtype=torch.float64)


In [22]:
y = np.add(y, 1) # y is not the same y as before. so y and x are now not the same variable. 
                 # Pay attention to this!!!!!!
print(y)
print(x)

[2. 2. 2. 2.]
tensor([1., 1., 1., 1.], dtype=torch.float64)


In [23]:
np.add(y, 1, out=y)
print(y)
print(x)

[3. 3. 3. 3.]
tensor([1., 1., 1., 1.], dtype=torch.float64)


In [24]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


## CUDA Tensors

In [25]:
# We will use ``torch.device`` objects to move tensors in and out of GPU
if torch.cuda.is_available():
    device = torch.device('cuda') # cuda device object
    y = torch.ones(3,3, device=device) # assign device to cuda
    x = torch.ones(3,3)
    x.to(device) # also can use .to(device)
    z = x+y
    print(z)
    print(z.to('cpu', torch.double))
    

# Autograd automatic differentiation

## Tensor

when you set attribute `.requires_grad` to `True`, the pytorch will track the operations, and when you do `.backward()`, it will compute the gradients automatically. You can use `.grad` or `.grad_fn` to get the gradient. `.grad_fn` for function.

To un-track the gradients, use `.detach()`. 

when evaluate, you do not need gradient, so use `with torch.no_grad():`

In [26]:
import torch

In [27]:
x = torch.ones(3,3, requires_grad=True)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], requires_grad=True)


In [28]:
y = x + 2
print(y)
print(y.grad_fn)

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]], grad_fn=<AddBackward>)
<AddBackward object at 0x11f9a3940>


In [29]:
z = y*y*3
out = z.mean()
print(z, out)

tensor([[27., 27., 27.],
        [27., 27., 27.],
        [27., 27., 27.]], grad_fn=<MulBackward>) tensor(27., grad_fn=<MeanBackward1>)


In [30]:
a = torch.rand(3,3)
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)

False
True


## Gradient

In [31]:
x = torch.ones(2,3, requires_grad=True)
y = x+2
z = y*y*3
out = z.mean()
out.backward()
print(x.grad)

tensor([[3., 3., 3.],
        [3., 3., 3.]])


In [32]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


# Neural Networks

This section we will use how to create a neural network using `torch.nn`.

For a typical training procedure, there are probably these steps:
* define the network, initial the weights
* iterate over a dataset of inputs
* process input through the network
* compute the loss
* propagate gradients back into the network's weights
* update weight

## define the network

First, define a simple convolutional neural network

![alexnet](./mnist.png)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    # 1. the "__init__" defines what layers you want to use
    # 2. the "forward" defines the structure of the network, namely how the data flows
    # 3. the pytorch will do the gradient automatically, generally you do not have to define
    # backward path.
    # 4. and other helper fuctions as needed.
    
    def __init__(self):
        super(Net, self).__init__()
        # kernel, input 1, output channel 6, kernel size 5
        self.conv1 = nn.Conv2d(1, 6, 5)
        
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # feed forward layer
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # max pooling over 2*2 window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        # if max pooling window size is square, can specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


Now lets print the parameters

In [5]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's weight

10
torch.Size([6, 1, 5, 5])


let's try a random input.

From the network shown above, we know the input size is 32*32

In [7]:
input_pic = torch.randn(1,1,32,32)
out = net(input_pic)
print(out)

tensor([[-0.1334, -0.0906, -0.0481, -0.0022, -0.1154,  0.0497,  0.0678,  0.0755,
         -0.0473,  0.0222]], grad_fn=<ThAddmmBackward>)


Zero the gradient buffers of all parameters and backprops with random gradients:

In [8]:
net.zero_grad()
out.backward(torch.randn(1,10))

`torch.nn` only supports mini-batches. The entire `torch.nn` package only supports inputs that are a mini-batch of samples, and not a single sample.

For example, `nn.Conv2d` will take in a 4D Tensor of `nSamples x nChannels x Height x Width`.

If you have a single sample, just use `input.unsqueeze(0)` to add a fake batch dimension.

## Loss Function