https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html

## Part 1
**Getting familiar with tensors**


In [2]:
from __future__ import print_function
import torch

In [10]:
x = torch.empty(5, 3)
print(x)

tensor([[9.1837e-39, 9.3674e-39, 1.0745e-38],
        [1.0653e-38, 9.5510e-39, 1.0561e-38],
        [1.0194e-38, 1.1112e-38, 1.0561e-38],
        [9.9184e-39, 1.0653e-38, 4.1327e-39],
        [1.0194e-38, 1.0469e-38, 8.9082e-39]])


In [9]:
x = torch.rand(5, 3)
print(x)

tensor([[0.8998, 0.1642, 0.3133],
        [0.3504, 0.5004, 0.1446],
        [0.2468, 0.9746, 0.8733],
        [0.4739, 0.1174, 0.5373],
        [0.2111, 0.7714, 0.0849]])


In [4]:
x = torch.zeros(5, 3)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [12]:
x = torch.tensor([[[5.5, 3, 4],[1,3,5]],[[-1, -3, 0],[-2,-9,-8]]])
print(x)

tensor([[[ 5.5000,  3.0000,  4.0000],
         [ 1.0000,  3.0000,  5.0000]],

        [[-1.0000, -3.0000,  0.0000],
         [-2.0000, -9.0000, -8.0000]]])


In [29]:
x.squeeze()

tensor([[[ 5.5000,  3.0000,  4.0000],
         [ 1.0000,  3.0000,  5.0000]],

        [[-1.0000, -3.0000,  0.0000],
         [-2.0000, -9.0000, -8.0000]]])

In [6]:
x = x.new_ones(5, 3, dtype=torch.double)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)


In [7]:
x = torch.randn_like(x, dtype=torch.float)
print(x)

tensor([[-0.9732, -0.2105,  0.6769],
        [-0.1850, -0.3077, -0.3135],
        [ 0.6904,  1.8216,  0.0082],
        [-0.6471, -3.0217,  0.3873],
        [-1.1779, -0.5295,  0.7934]])


In [8]:
print(x.size())

torch.Size([5, 3])


In [9]:
y = torch.rand(5, 3)
print(x + y)

tensor([[-0.6642,  0.2742,  1.4818],
        [ 0.1966,  0.3502,  0.0198],
        [ 1.6471,  2.5992,  0.5928],
        [-0.5704, -3.0181,  1.3544],
        [-0.2366,  0.4273,  0.8540]])


In [10]:
print(torch.add(x, y))

tensor([[-0.6642,  0.2742,  1.4818],
        [ 0.1966,  0.3502,  0.0198],
        [ 1.6471,  2.5992,  0.5928],
        [-0.5704, -3.0181,  1.3544],
        [-0.2366,  0.4273,  0.8540]])


In [11]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[-0.6642,  0.2742,  1.4818],
        [ 0.1966,  0.3502,  0.0198],
        [ 1.6471,  2.5992,  0.5928],
        [-0.5704, -3.0181,  1.3544],
        [-0.2366,  0.4273,  0.8540]])


In [12]:
y.add_(x)
print(y)

tensor([[-0.6642,  0.2742,  1.4818],
        [ 0.1966,  0.3502,  0.0198],
        [ 1.6471,  2.5992,  0.5928],
        [-0.5704, -3.0181,  1.3544],
        [-0.2366,  0.4273,  0.8540]])


In [13]:
print(x[:,1])

tensor([-0.2105, -0.3077,  1.8216, -3.0217, -0.5295])


In [14]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [15]:
x = torch.randn(1, 1)
print(x)
print(x.item())

tensor([[0.0975]])
0.09750141948461533


In [16]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [17]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [18]:
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [19]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


## Part 2
**Getting familiar with autograd**


In [24]:
import torch

In [27]:
x = torch.ones(2, 2, requires_grad = True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [30]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [32]:
print(y.grad_fn)

<AddBackward0 object at 0x0000000005424748>


In [33]:
z = y * y * 3
out = z.mean()

print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [35]:
a = torch.randn(2, 2)
a = (a * 3) / (a - 1)
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a*a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x00000000054290C8>


In [37]:
out.backward()

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [38]:
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [39]:
x = torch.randn(3, requires_grad = True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
    
print(y)

tensor([  541.1512, -1594.1580,   710.0298], grad_fn=<MulBackward0>)


In [42]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v)

print(x.grad)

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])


In [43]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


In [46]:
print(x.requires_grad)
y = x.detach()
print(y.requires_grad)
print(x.eq(y))
print(x.eq(y).all())

True
False
tensor([True, True, True])
tensor(True)


## Part 3
**Getting familiar with neural networks**


In [72]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output image channels, 3x3 square convolution kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3) # size (6, 16, 4) more accurate here...
        # affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6 * 6 from image dimensions
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [73]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 3, 3])


In [74]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0842, -0.0056, -0.1156,  0.0546, -0.0594,  0.0797,  0.0703,  0.0099,
          0.1196, -0.0851]], grad_fn=<AddmmBackward>)


In [75]:
net.zero_grad()
out.backward(torch.randn(1, 10))

losses

In [76]:
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.6766, grad_fn=<MseLossBackward>)


In [77]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x00000000054D9C48>
<AddmmBackward object at 0x0000000008E9B408>
<AccumulateGrad object at 0x0000000008E87E88>


In [78]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0084, -0.0074,  0.0142, -0.0081, -0.0091,  0.0038])


In [79]:
# manual SGD:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
    

In [80]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update