# PyTorch tutorial

In [1]:
import numpy as np
import torch

## Tensors

In [3]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)

In [7]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

In [8]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.3845, 0.8900],
        [0.2554, 0.9199]]) 



In [9]:
shape = (2,3)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.2354, 0.9639, 0.1382],
        [0.5726, 0.1465, 0.3805]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [10]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [20]:
# linear algebta functions
torch.eig(tensor.matmul(tensor.T), eigenvectors=True)

torch.return_types.eig(
eigenvalues=tensor([[4.3318, 0.0000],
        [0.4857, 0.0000],
        [0.2914, 0.0000]]),
eigenvectors=tensor([[-0.6660, -0.6922, -0.2781],
        [-0.5465,  0.7065, -0.4497],
        [-0.5077,  0.1475,  0.8488]]))

## Autograd

In [9]:
# create dummy tensors
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [10]:
# output tensor
q = 3*a**2 + b
r = q * a

In [11]:
r

tensor([36., 93.], grad_fn=<MulBackward0>)

In [4]:
# compute gradients
r.backward(gradient=torch.ones_like(q))

In [5]:
a.grad

tensor([12., 18.])

In [8]:
q.requires_grad

True

In [7]:
b.grad

tensor([1., 1.])

In [33]:
# traverse the DAG
q.grad_fn.next_functions[0][0].next_functions

((<PowBackward0 at 0x7f5f963d7828>, 0), (None, 0))

## Neural nets

In [34]:
import torch.nn as nn
import torch.nn.functional as F

In [63]:
# create a ConvNet
class ConvNet(nn.Module):
    
    def __init__(self):
        super(ConvNet, self).__init__()
        # conv layers
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # fc layers
        self.fc1 = nn.Linear(16*6*6, 120)
        self.fc2 = nn.Linear(120, 64)
        self.fc3 = nn.Linear(64, 10)
        
    def forward(self, x):
        # apply convolutions and downsampling
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # flatten input tensor
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
        
    def num_flat_features(self, x):
        shape = x.size()[1:] # don't count the batch dimension
        n_feat = 1
        for d in shape:
            n_feat *= d
        return n_feat


In [64]:
cn = ConvNet()
print(cn)

ConvNet(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)


In [65]:
# learnable params
for p in cn.parameters():
    print(p.size())

torch.Size([6, 1, 3, 3])
torch.Size([6])
torch.Size([16, 6, 3, 3])
torch.Size([16])
torch.Size([120, 576])
torch.Size([120])
torch.Size([64, 120])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])


In [67]:
# random input batch
data = torch.randn(4, 1, 32, 32)
y = cn(data)
print(y)

tensor([[-0.0160,  0.0211,  0.0481,  0.0145,  0.0476, -0.0901,  0.1012,  0.0534,
          0.1393, -0.0195],
        [-0.0133,  0.0203,  0.0451, -0.0230,  0.0440, -0.1155,  0.1108,  0.0148,
          0.1849, -0.0382],
        [-0.0049,  0.0212,  0.0854, -0.0192,  0.0530, -0.1178,  0.0923,  0.0362,
          0.1653, -0.0411],
        [-0.0149, -0.0164,  0.0443, -0.0172,  0.0585, -0.0874,  0.0731,  0.0426,
          0.1373, -0.0412]], grad_fn=<AddmmBackward>)


In [69]:
# backprop
cn.zero_grad()
y.backward(gradient=torch.randn(4, 10))

In [92]:
# inspect computed parameter gradients
cn_params = list(cn.parameters())
len(cn_params)

10

In [93]:
cn_params[3].grad

tensor([-0.0100, -0.0890,  0.0338,  0.0380, -0.0360, -0.3052, -0.1569,  0.0819,
        -0.2681, -0.0612,  0.0151, -0.0623, -0.1875, -0.0663,  0.0347, -0.1055])

### Loss function

In [None]:
# dummy input and targets
data = torch.randn(4, 1, 32, 32)
pred = cn(data)
target = torch.randn_like(pred)

In [96]:
target

tensor([[-0.7002,  1.4774,  0.3304, -0.2408, -0.6467, -1.4701,  0.5988, -0.8106,
         -0.8798,  0.1577],
        [ 0.5366,  0.9574, -0.0164, -0.5578,  0.1467, -0.2498, -1.2092,  1.2324,
          0.6097, -1.1467],
        [-0.4037,  0.3217, -1.3192,  1.9048, -0.5673, -1.0156,  0.7784, -2.4793,
         -0.7999, -0.2350],
        [ 1.4927, -0.3113,  1.0824,  0.1498,  1.4842, -0.3042, -0.2295, -0.3804,
          2.1648,  0.6430]])

In [97]:
# loss function
mse = nn.MSELoss()
loss = mse(pred, target)
loss

tensor(0.9649, grad_fn=<MseLossBackward>)

In [98]:
# compute gradients
cn.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(cn.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(cn.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0034, -0.0019, -0.0026, -0.0022, -0.0062, -0.0106])


### Backprop

In [109]:
import torch.optim as optim

In [104]:
# inspect computed parameter gradients
cn_params = list(cn.parameters())
len(cn_params)

10

In [112]:
cn_params[0]

Parameter containing:
tensor([[[[-0.2930, -0.1676,  0.2038],
          [ 0.0782,  0.1913, -0.1186],
          [-0.2752,  0.1125,  0.0702]]],


        [[[ 0.0406,  0.0976,  0.2261],
          [ 0.1427, -0.0462, -0.2141],
          [-0.0154, -0.3233,  0.1652]]],


        [[[-0.1747,  0.0425, -0.1593],
          [ 0.1950,  0.0265, -0.1779],
          [ 0.0491,  0.1404, -0.1922]]],


        [[[ 0.0414, -0.1219, -0.1255],
          [ 0.0629, -0.3287, -0.2671],
          [ 0.2675,  0.2972, -0.2151]]],


        [[[ 0.0926,  0.3157, -0.1293],
          [-0.0129,  0.1018,  0.1835],
          [-0.0410, -0.3076,  0.3066]]],


        [[[ 0.1276,  0.3241,  0.0871],
          [ 0.0999, -0.0062,  0.0729],
          [ 0.0083, -0.1388, -0.0614]]]], requires_grad=True)

In [110]:
sgd = optim.SGD(cn.parameters(), lr=0.01)

# update weights
cn.zero_grad()
pred = cn(data)
loss = mse(pred, target)
loss.backward()
sgd.step()

In [111]:
cn_params[0]

Parameter containing:
tensor([[[[-0.2930, -0.1676,  0.2038],
          [ 0.0782,  0.1913, -0.1186],
          [-0.2752,  0.1125,  0.0702]]],


        [[[ 0.0406,  0.0976,  0.2261],
          [ 0.1427, -0.0462, -0.2141],
          [-0.0154, -0.3233,  0.1652]]],


        [[[-0.1747,  0.0425, -0.1593],
          [ 0.1950,  0.0265, -0.1779],
          [ 0.0491,  0.1404, -0.1922]]],


        [[[ 0.0414, -0.1219, -0.1255],
          [ 0.0629, -0.3287, -0.2671],
          [ 0.2675,  0.2972, -0.2151]]],


        [[[ 0.0926,  0.3157, -0.1293],
          [-0.0129,  0.1018,  0.1835],
          [-0.0410, -0.3076,  0.3066]]],


        [[[ 0.1276,  0.3241,  0.0871],
          [ 0.0999, -0.0062,  0.0729],
          [ 0.0083, -0.1388, -0.0614]]]], requires_grad=True)