In [127]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [8]:
# Uninitialized
torch.empty(5, 3)

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 1.6573e+36]])

In [9]:
# Initialized
torch.rand(5, 3)

tensor([[0.7148, 0.0289, 0.4189],
        [0.3223, 0.6405, 0.8271],
        [0.5467, 0.6468, 0.2061],
        [0.9913, 0.2140, 0.1812],
        [0.0392, 0.9157, 0.1558]])

In [10]:
# With type
torch.zeros(5, 3, dtype=torch.long)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [11]:
# From data
torch.tensor([5, 3])

tensor([5, 3])

In [16]:
# From existing tensor
x = torch.ones(5, 3, dtype=torch.double)
torch.randn_like(x)

tensor([[ 0.5406,  1.5145, -0.3542],
        [ 0.8924,  0.3300, -1.1371],
        [-2.4557, -0.2414, -2.3109],
        [ 1.3068,  1.0592,  0.4216],
        [-0.9412, -1.1299,  1.9326]], dtype=torch.float64)

In [23]:
# Different faces of add
x = torch.rand(5, 3)
y = torch.rand(5, 3)
z = x + y
print(z)
z = torch.add(x, y)
print(z)
print(z)
torch.add(x, y, out=z)
print(z)
z = x
z.add_(y)
print(z)

tensor([[1.2134, 1.2085, 1.0990],
        [1.4780, 1.1750, 1.0276],
        [1.9295, 0.5874, 1.0015],
        [1.5933, 1.5785, 1.4470],
        [1.2730, 0.4871, 1.0462]])
tensor([[1.2134, 1.2085, 1.0990],
        [1.4780, 1.1750, 1.0276],
        [1.9295, 0.5874, 1.0015],
        [1.5933, 1.5785, 1.4470],
        [1.2730, 0.4871, 1.0462]])
tensor([[1.2134, 1.2085, 1.0990],
        [1.4780, 1.1750, 1.0276],
        [1.9295, 0.5874, 1.0015],
        [1.5933, 1.5785, 1.4470],
        [1.2730, 0.4871, 1.0462]])
tensor([[1.2134, 1.2085, 1.0990],
        [1.4780, 1.1750, 1.0276],
        [1.9295, 0.5874, 1.0015],
        [1.5933, 1.5785, 1.4470],
        [1.2730, 0.4871, 1.0462]])
tensor([[1.2134, 1.2085, 1.0990],
        [1.4780, 1.1750, 1.0276],
        [1.9295, 0.5874, 1.0015],
        [1.5933, 1.5785, 1.4470],
        [1.2730, 0.4871, 1.0462]])


In [25]:
# Indexing slicing
x[:, 2]

tensor([1.0990, 1.0276, 1.0015, 1.4470, 1.0462])

In [26]:
# Change shape
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [29]:
# Get scala value
x = torch.tensor(2)
print(x)
x.item()

tensor(2)


2

In [36]:
# Shared memory location with numpy
a = torch.ones(2, 3)
b = a.numpy()
a.add_(1)
print(a)
print(b)

a = np.zeros((2, 3))
b = torch.from_numpy(a)
np.add(a, 5, out=a)
print(a)
print(b)

tensor([[2., 2., 2.],
        [2., 2., 2.]])
[[2. 2. 2.]
 [2. 2. 2.]]
[[5. 5. 5.]
 [5. 5. 5.]]
tensor([[5., 5., 5.],
        [5., 5., 5.]], dtype=torch.float64)


In [43]:
# Open the auto_grad
x = torch.ones(2, 2, requires_grad=True)
print(x)

# When performing calculation, it create a grad_fn
y = x**2
print(y)

# Performing more calculation
z = y * y * 3
out = z.mean()
print(z)
print(out)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[1., 1.],
        [1., 1.]], grad_fn=<PowBackward0>)
tensor([[3., 3.],
        [3., 3.]], grad_fn=<MulBackward0>)
tensor(3., grad_fn=<MeanBackward0>)


In [44]:
# Change required grad in place
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b)
print(b.grad_fn)

False
True
tensor(207.3658, grad_fn=<SumBackward0>)
<SumBackward0 object at 0x1224fcf60>


In [61]:
# Perform backprop
x = torch.ones(2, 2, requires_grad=True)
y = x**2
z = y * y * 3
out = z.mean()

out.backward(z)
x.grad

tensor([[36., 36.],
        [36., 36.]])

In [63]:
# Example with vector-Jacobian product
x = torch.randn(3, requires_grad=True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v)
print(x.grad)

tensor([8.1920e+02, 8.1920e+03, 8.1920e-01])


In [69]:
# Temporarily turn autograd off
print(x.requires_grad)
print((x**2).requires_grad)
with torch.no_grad():
    print((x**2).requires_grad)

True
True
False


In [86]:
class Net(nn.Module):
    
    def __init__(self):
        # init the origin?
        super(Net, self).__init__()
        # conv layers
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # fc layers
        self.fc1 = nn.Linear(16*6*6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # Flaten
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [94]:
# Show parameters
params = list(net.parameters())
print(len(params))
# Show first layers
print(params[4].size())

10
torch.Size([120, 576])


In [113]:
# Forward prop
input = torch.randn(1, 1, 32, 32)
# call the forward
out = net(input) 
print(out)

tensor([[-0.1061, -0.0452,  0.0014, -0.0356, -0.0383,  0.0768, -0.0819, -0.0427,
         -0.0440,  0.0351]], grad_fn=<AddmmBackward>)


In [114]:
# Backprop
net.zero_grad()
out.backward(torch.randn(1, 10))

In [115]:
# Backprop with loss function
output = net(input)
target = torch.randn(10)
target = target.view(1, -1)
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

net.zero_grad()
print(net.conv1.bias.grad)
loss.backward()
print(net.conv1.bias.grad)

tensor(0.7949, grad_fn=<MseLossBackward>)
<MseLossBackward object at 0x123021390>
<AddmmBackward object at 0x123056a90>
<AccumulateGrad object at 0x123021390>
tensor([0., 0., 0., 0., 0., 0.])
tensor([-0.0022,  0.0034,  0.0003,  0.0023, -0.0023,  0.0045])


In [119]:
# Adjusting weights manually
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [121]:
# Adjusting weights using optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()