# 1. Pytorch basics

- Used as part of INFO8010 Deep Learning (Gilles Louppe, 2018-2019).
- Originally adapted from [Pytorch tutorial for Deep Learning researchers](https://github.com/yunjey/pytorch-tutorial) (Yunvey Choi, 2018).

---

In [1]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch 
import torchvision
import torch.nn as nn ##functions for NN
import torch.utils.data as data
import torchvision.transforms as transforms
import torchvision.datasets as dsets

# Basic autograd example 1

In [15]:
# Create tensors.
x = torch.tensor(5.)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

In [16]:
# Build a computational graph.
y = w * x + b    # y = 2 * x + 3

# Compute gradients.
y.backward()

# Print out the gradients.
print(x.grad)    # x.grad = 2 
print(w.grad)    # w.grad = 1 
print(b.grad)    # b.grad = 1 

None
tensor(5.)
tensor(1.)


<div class="alert alert-success">
<b>EXERCISE</b>:

<ul>
    <li>Define a polynomial model <code>y = w1*x + w2*x^2 + b</code>.
    <li>Compute the value of <code>y</code> at <code>x=2</code>, <code>w1=1.5</code>, <code>w2=-1.0</code> and <code>b=3</code>.
    <li>Evaluate the derivate of <code>y</code> with respect to <code>w2</code> at these values.
</ul>

</div>

In [27]:
x = torch.tensor(2.)
w1 = torch.tensor(1.5, requires_grad = True)
w2 = torch.tensor(-1.0, requires_grad = True)
b = torch.tensor(3.)
y = w1*x+w2*x**2+b
y.backward()
print(w2.grad, w1.grad)

##alternative

tensor(4.) tensor(2.)


# Basic autograd example 2

In [37]:
# Create tensors.
x = torch.randn(5, 3)
y = torch.randn(5, 2)

In [38]:
y

tensor([[ 1.4848,  0.6692],
        [ 1.5627,  1.5576],
        [-1.1278,  1.0349],
        [ 1.2540, -0.8508],
        [ 2.2638,  0.3483]])

In [39]:
# Build a linear layer.
linear = nn.Linear(3, 2)
print('w: ', linear.weight)
print('b: ', linear.bias)

w:  Parameter containing:
tensor([[-0.4143,  0.2898, -0.0647],
        [-0.1245, -0.4170, -0.2959]], requires_grad=True)
b:  Parameter containing:
tensor([-0.0780,  0.5190], requires_grad=True)


In [40]:
for p in linear.parameters():
    print(p, p.numel())

Parameter containing:
tensor([[-0.4143,  0.2898, -0.0647],
        [-0.1245, -0.4170, -0.2959]], requires_grad=True) 6
Parameter containing:
tensor([-0.0780,  0.5190], requires_grad=True) 2


In [41]:
# Forward propagation.
pred = linear(x)
print(pred)

tensor([[ 0.2779,  0.2915],
        [-0.4344,  1.1561],
        [ 0.1359, -0.0609],
        [-0.1717,  0.4174],
        [-0.4274,  0.4798]], grad_fn=<AddmmBackward>)


In [42]:
# Build Loss and Optimizer.
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(linear.parameters(), lr=0.01)#learning rate

# Compute loss.
loss = criterion(pred, y)
print('loss: ', loss)

loss:  tensor(1.9448, grad_fn=<MseLossBackward>)


In [43]:
# Backpropagation.
loss.backward()

# Print out the gradients.
print ('dL/dw: ', linear.weight.grad) 
print ('dL/db: ', linear.bias.grad)

dL/dw:  tensor([[-0.3494,  0.6689,  0.2049],
        [ 0.1140, -0.1272, -0.0928]])
dL/db:  tensor([-1.2115, -0.0951])


In [44]:
# 1-step Optimization (gradient descent).
optimizer.step()

# You can also do optimization at the low level as shown below.
# linear.weight.data.sub_(0.01 * linear.weight.grad.data)
# linear.bias.data.sub_(0.01 * linear.bias.grad.data)

# Print out the loss after optimization.
pred = linear(x)
loss = criterion(pred, y)
print('loss after 1 step optimization: ', loss.item())

loss after 1 step optimization:  1.923598289489746


<div class="alert alert-success">
<b>EXERCISE</b>:

Write the code above within a for loop that trains the linear models for 100 steps. Check that your loss is decreasing.

</div>

In [53]:
for i in range(100):
    pred = linear(x)
    loss = criterion(pred,y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i%10 --0:
        print(loss.detach().data)

tensor(1.5207)
tensor(1.5039)
tensor(1.4873)
tensor(1.4711)
tensor(1.4551)
tensor(1.4394)
tensor(1.4239)
tensor(1.4087)
tensor(1.3938)
tensor(1.3647)
tensor(1.3505)
tensor(1.3365)
tensor(1.3228)
tensor(1.3093)
tensor(1.2960)
tensor(1.2830)
tensor(1.2701)
tensor(1.2575)
tensor(1.2329)
tensor(1.2208)
tensor(1.2090)
tensor(1.1974)
tensor(1.1859)
tensor(1.1746)
tensor(1.1636)
tensor(1.1526)
tensor(1.1419)
tensor(1.1209)
tensor(1.1107)
tensor(1.1006)
tensor(1.0907)
tensor(1.0810)
tensor(1.0714)
tensor(1.0619)
tensor(1.0526)
tensor(1.0434)
tensor(1.0255)
tensor(1.0168)
tensor(1.0081)
tensor(0.9997)
tensor(0.9913)
tensor(0.9831)
tensor(0.9750)
tensor(0.9670)
tensor(0.9591)
tensor(0.9437)
tensor(0.9362)
tensor(0.9288)
tensor(0.9215)
tensor(0.9143)
tensor(0.9072)
tensor(0.9003)
tensor(0.8934)
tensor(0.8866)
tensor(0.8733)
tensor(0.8668)
tensor(0.8604)
tensor(0.8541)
tensor(0.8479)
tensor(0.8418)
tensor(0.8357)
tensor(0.8298)
tensor(0.8239)
tensor(0.8124)
tensor(0.8067)
tensor(0.8012)
tensor(0.7

# Load data from numpy

In [54]:
a = np.array([[1,2], [3,4]])
b = torch.from_numpy(a)      # convert numpy array to torch tensor
c = b.numpy()                # convert torch tensor to numpy array

# Implementing the input pipeline

In [55]:
# Download and construct dataset.
train_dataset = dsets.CIFAR10(root='./data/',
                              train=True, 
                              transform=transforms.ToTensor(),
                              download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


170500096it [17:40, 225117.38it/s]                                                                                     

In [None]:
# Select one data pair (read data from disk).
image, label = train_dataset[7]
print(image.size())
print(label)#right answer here

In [None]:
from scipy.misc import toimage
toimage(image)

In [None]:
# Data Loader (this provides queue and thread in a very simple way).##more efficient  way to dwnld data
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100, ##samples for each it
                                           shuffle=True,
                                           num_workers=2)

# When iteration starts, queue and thread start to load dataset from files.
data_iter = iter(train_loader) 

# Mini-batch images and labels.
images, labels = data_iter.next()

# Actual usage of data loader is as below.
for images, labels in train_loader:
    # Your training code will be written here
    pass

# Input pipeline for custom dataset

In [None]:
# You should build custom dataset as below.
class CustomDataset(data.Dataset):
    def __init__(self):
        # TODO
        # 1. Initialize file path or list of file names. 
        pass
    def __getitem__(self, index):
        # TODO
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        pass
    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return 0 

# Then, you can just use prebuilt torch's data loader. 
custom_dataset = CustomDataset()
train_loader = torch.utils.data.DataLoader(dataset=custom_dataset,
                                           batch_size=100, 
                                           shuffle=True,
                                           num_workers=2)

# Save and load model

In [None]:
# Save and load the entire model.
torch.save(linear, 'model.pkl')
model = torch.load('model.pkl')

# Save and load only the model parameters(recommended).
torch.save(linear.state_dict(), 'params.pkl')
linear.load_state_dict(torch.load('params.pkl'))