## Introduction to PyTorch !


PyTorch is a python framework for deep learning tasks. It was tailored to be fast and pythnonic(Yeah!). The biggest                    advantage is its ability to automatically calculate gradients for the specified variables.The autograd package provides automatic differentiation for all operations on variables.This is very importatnt in  case of deep learning, as calculating gradients during back-propogation becomes hassle free.


In [1]:
import torch
import numpy as np
#import time

### Tensor 

it's a n-diamensional array which resides on the gpu(mostly of the cases).

Types supported:

    64-bit (Float + Int)
    32-bit (Float + Int)
    16-bit (Float + Int)
    8-bit (Signed + Unsigned)

In [2]:
#Creating a 2x3 tensor.
x = torch.Tensor(2, 3)

In [26]:
#Creating a 2x3 tensor with values randomly selected from a Uniform Distribution between -1 and 1
y = torch.Tensor(2, 3)
y = y.uniform_(-1, 1)

In [4]:
print(y)


-0.4229 -0.9048  0.2849
 0.8193  0.2569  0.1496
[torch.FloatTensor of size 2x3]



In [6]:
# add and storing result in another tensor.
x.uniform_(-1, 1)
result = torch.Tensor(1, 1)
torch.add(x, y, out=result)
# notice how result got broadcasted into an tensor of 2x3.


-0.9663 -0.3793  0.5610
 1.0834 -0.4627 -0.4212
[torch.FloatTensor of size 2x3]

In [7]:
# normal multiplication
print(torch.mul(y, 2.0))


-0.8458 -1.8095  0.5697
 1.6387  0.5139  0.2991
[torch.FloatTensor of size 2x3]



In [8]:
# Using In-line functions.. not the '_', helps in faster execution time. Here adding y with itself and storing 
y.add_(y)


-0.8458 -1.8095  0.5697
 1.6387  0.5139  0.2991
[torch.FloatTensor of size 2x3]

In [9]:
#Converting Tensors to Numpy arrays.
nampy = y.numpy()
print(nampy)

[[-0.84581065 -1.8095355   0.5697179 ]
 [ 1.638674    0.5138924   0.29911923]]


In [28]:
#moving the whole operations to GPU.
if torch.cuda.is_available():
    #y = y.cuda()
    u = (y + y).cuda()
print(u) # notice (GPU0) at the end.


-1.8789 -2.4602  0.9873
-2.8663  3.3645  0.5312
[torch.cuda.FloatTensor of size 2x3 (GPU 0)]



### Variable
 __autograd.Variable__ is the central class of the package. It wraps a Tensor, and supports nearly all of operations defined on it. Once you finish your computation you can call __.backward()__ and have all the gradients computed automatically.
![alt text](http://pytorch.org/tutorials/_images/Variable.png "Variable Structure")

You can access the raw tensor through the __.data__ attribute, while the gradient w.r.t. this variable is accumulated into __.grad__.

In [15]:
# import Variable from pytorch.
from torch.autograd import Variable

In [16]:
#Creating a dot product of two matrices.

x = Variable(torch.cuda.FloatTensor([10, 10]))
y = Variable(torch.cuda.FloatTensor([5, 0]), requires_grad=True)

z = x.dot(y*y)

In [17]:
z.backward(retain_graph=True) # for computing gradients automatically.
print(f'value of z : {z.data}')

value of z : 
 250
[torch.cuda.FloatTensor of size 1 (GPU 0)]



In [18]:
y.grad.data


 100
   0
[torch.cuda.FloatTensor of size 2 (GPU 0)]

In [19]:
#runing backward pass for the second time.
z.backward()

In [20]:
y.grad.data
#Here the resultant gradient is erroneous according to our actual input.
#This is because while, calculating the gradiets during the second pass, they get added with the gradients from the first pass.
#Initializing weights to zero after each pass, solves the issue.


 200
   0
[torch.cuda.FloatTensor of size 2 (GPU 0)]

In [21]:
x = Variable(torch.cuda.FloatTensor([10, 10]))
y = Variable(torch.cuda.FloatTensor([5, 0]), requires_grad=True)
z = x.dot(y*y)

z.backward(retain_graph=True)
print(f'Gradients form the first run : {y.grad.data}')

#Uncomment the line below to understand the error.
#y.grad.data.zero_() # weights --> 0

z.backward()
print(f'Gradients form the first run : {y.grad.data}')

Gradients form the first run : 
 100
   0
[torch.cuda.FloatTensor of size 2 (GPU 0)]

Gradients form the first run : 
 200
   0
[torch.cuda.FloatTensor of size 2 (GPU 0)]



In [22]:
import torch
from torch.autograd import Variable

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = Variable(torch.Tensor([1.0]),  requires_grad=True)  # Any random value

# our model forward pass


def forward(x):
    return x * w

# Loss function


def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) * (y_pred - y)

# Before training
print("predict (before training)",  4, forward(4).data[0])

# Training loop
for epoch in range(10):
    for x_val, y_val in zip(x_data, y_data):
        l = loss(x_val, y_val)
        l.backward()
        print("\tgrad: ", x_val, y_val, w.grad.data[0])
        w.data = w.data - 0.01 * w.grad.data

        # Manually zero the gradients after updating weights
        w.grad.data.zero_()

    print("progress:", epoch, l.data[0])

# After training
print("predict (after training)", 4, forward(4).data[0])

predict (before training) 4 4.0
	grad:  1.0 2.0 -2.0
	grad:  2.0 4.0 -7.840000152587891
	grad:  3.0 6.0 -16.228801727294922
progress: 0 7.315943717956543
	grad:  1.0 2.0 -1.478623867034912
	grad:  2.0 4.0 -5.796205520629883
	grad:  3.0 6.0 -11.998146057128906
progress: 1 3.9987640380859375
	grad:  1.0 2.0 -1.0931644439697266
	grad:  2.0 4.0 -4.285204887390137
	grad:  3.0 6.0 -8.870372772216797
progress: 2 2.1856532096862793
	grad:  1.0 2.0 -0.8081896305084229
	grad:  2.0 4.0 -3.1681032180786133
	grad:  3.0 6.0 -6.557973861694336
progress: 3 1.1946394443511963
	grad:  1.0 2.0 -0.5975041389465332
	grad:  2.0 4.0 -2.3422164916992188
	grad:  3.0 6.0 -4.848389625549316
progress: 4 0.6529689431190491
	grad:  1.0 2.0 -0.4417421817779541
	grad:  2.0 4.0 -1.7316293716430664
	grad:  3.0 6.0 -3.58447265625
progress: 5 0.35690122842788696
	grad:  1.0 2.0 -0.3265852928161621
	grad:  2.0 4.0 -1.2802143096923828
	grad:  3.0 6.0 -2.650045394897461
progress: 6 0.195076122879982
	grad:  1.0 2.0 -0.24144

In [23]:

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = Variable(torch.Tensor([1.0]),  requires_grad=True)  # Any random value

# our model forward pass


def forward(x):
    return x * w

# Loss function


def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) * (y_pred - y)

# Before training
print("predict (before training)",  4, forward(4).data[0])

# Training loop
for epoch in range(10):
    for x_val, y_val in zip(x_data, y_data):
        l = loss(x_val, y_val)
        l.backward()
        print("\tgrad: ", x_val, y_val, w.grad.data[0])
        w.data = w.data - 0.01 * w.grad.data

        # Manually zero the gradients after updating weights
        w.grad.data.zero_()

    print("progress:", epoch, l.data[0])

# After training
print("predict (after training)", 4, forward(4).data[0])

predict (before training) 4 4.0
	grad:  1.0 2.0 -2.0
	grad:  2.0 4.0 -7.840000152587891
	grad:  3.0 6.0 -16.228801727294922
progress: 0 7.315943717956543
	grad:  1.0 2.0 -1.478623867034912
	grad:  2.0 4.0 -5.796205520629883
	grad:  3.0 6.0 -11.998146057128906
progress: 1 3.9987640380859375
	grad:  1.0 2.0 -1.0931644439697266
	grad:  2.0 4.0 -4.285204887390137
	grad:  3.0 6.0 -8.870372772216797
progress: 2 2.1856532096862793
	grad:  1.0 2.0 -0.8081896305084229
	grad:  2.0 4.0 -3.1681032180786133
	grad:  3.0 6.0 -6.557973861694336
progress: 3 1.1946394443511963
	grad:  1.0 2.0 -0.5975041389465332
	grad:  2.0 4.0 -2.3422164916992188
	grad:  3.0 6.0 -4.848389625549316
progress: 4 0.6529689431190491
	grad:  1.0 2.0 -0.4417421817779541
	grad:  2.0 4.0 -1.7316293716430664
	grad:  3.0 6.0 -3.58447265625
progress: 5 0.35690122842788696
	grad:  1.0 2.0 -0.3265852928161621
	grad:  2.0 4.0 -1.2802143096923828
	grad:  3.0 6.0 -2.650045394897461
progress: 6 0.195076122879982
	grad:  1.0 2.0 -0.24144

In [29]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable


# Hyper Parameters 
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [25]:
# MNIST Dataset
train_dataset = datasets.MNIST(root='./data/',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./data/',
                              train=False,
                              transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)


NameError: name 'datasets' is not defined

In [None]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(320, 10)

    def forward(self, x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))
        x = F.relu(self.mp(self.conv2(x)))
        x = x.view(in_size, -1)  # flatten the tensor
        x = self.fc(x)
return F.log_softmax(x)

In [None]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))


In [None]:
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        # sum up batch loss
        test_loss += F.nll_loss(output, target, size_average=False).data[0]
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))

In [None]:
for epoch in range(1, 10):
    train(epoch)
    test()