In [1]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

SEED = 515
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# PyTorch: Tensors and Autograd

In [2]:
m_data = [[1, 2, 3],
          [6, 5, 4]]
m = torch.tensor(m_data, dtype=torch.float)
m

tensor([[1., 2., 3.],
        [6., 5., 4.]])

In [3]:
x = torch.randn((2, 3, 4))
x

tensor([[[-0.3902,  1.4256,  0.0491, -0.0559],
         [-1.0172,  0.6198,  0.3173, -1.8878],
         [-0.3775,  1.6218, -0.5878,  0.0452]],

        [[-0.5670,  1.6442,  1.6313, -1.2841],
         [ 0.0240,  0.4192,  1.3738,  0.6919],
         [-0.7332, -0.2310,  0.1162, -0.6269]]])

In [4]:
x1 = torch.randn((2, 4))
y1 = torch.randn((3, 4))
print(x1)
print(y1)
# By default, concatenate along first axis (dim)
# like np.concatenate...
z1 = torch.cat([x1, y1], dim=0)
print(z1)

tensor([[-1.1112, -0.3307,  1.0290, -0.5752],
        [ 0.2071, -0.6596,  1.4697,  0.1795]])
tensor([[ 0.3530, -0.2241, -0.1479,  0.6436],
        [ 1.1626, -0.2370,  0.1363,  0.1794],
        [ 0.9533, -1.3683, -1.6694,  0.3587]])
tensor([[-1.1112, -0.3307,  1.0290, -0.5752],
        [ 0.2071, -0.6596,  1.4697,  0.1795],
        [ 0.3530, -0.2241, -0.1479,  0.6436],
        [ 1.1626, -0.2370,  0.1363,  0.1794],
        [ 0.9533, -1.3683, -1.6694,  0.3587]])


In [5]:
x = torch.randn(2, 3, 4)
print(x)
# like np.reshape...
print(x.view(2, 12))
print(x.view(2, -1))

tensor([[[-8.4383e-01, -1.4043e+00, -1.6591e-03, -4.5854e-01],
         [ 4.7313e-03,  4.8140e-01,  3.2641e+00, -2.0551e+00],
         [ 8.1119e-01, -1.0239e+00,  1.2149e-01, -1.3790e-01]],

        [[-1.3063e+00,  1.4654e-01, -7.1071e-02,  3.5700e-01],
         [-2.8966e-01, -6.3497e-01, -1.2692e+00, -1.5048e+00],
         [-1.4057e+00, -8.9566e-01, -8.9407e-01, -3.6626e-01]]])
tensor([[-8.4383e-01, -1.4043e+00, -1.6591e-03, -4.5854e-01,  4.7313e-03,
          4.8140e-01,  3.2641e+00, -2.0551e+00,  8.1119e-01, -1.0239e+00,
          1.2149e-01, -1.3790e-01],
        [-1.3063e+00,  1.4654e-01, -7.1071e-02,  3.5700e-01, -2.8966e-01,
         -6.3497e-01, -1.2692e+00, -1.5048e+00, -1.4057e+00, -8.9566e-01,
         -8.9407e-01, -3.6626e-01]])
tensor([[-8.4383e-01, -1.4043e+00, -1.6591e-03, -4.5854e-01,  4.7313e-03,
          4.8140e-01,  3.2641e+00, -2.0551e+00,  8.1119e-01, -1.0239e+00,
          1.2149e-01, -1.3790e-01],
        [-1.3063e+00,  1.4654e-01, -7.1071e-02,  3.5700e-01, -2.8

In [6]:
# The Tensors have requires_grad=False, NOT tracking computation history, by default. 
x = torch.tensor([1, 2, 3], dtype=torch.float32)
print(x)

y = torch.tensor([4, 5, 6], dtype=torch.float32)
z = x + y
print(z)
print(z.grad_fn)

tensor([1., 2., 3.])
tensor([5., 7., 9.])
None


In [7]:
# The Tensors have requires_grad=True, tracking computation history. 
x = torch.tensor([1, 2, 3], dtype=torch.float32, requires_grad=True)
print(x)

y = torch.tensor([4, 5, 6], dtype=torch.float32, requires_grad=True)
z = x + y
print(z)
print(z.grad_fn)

tensor([1., 2., 3.], requires_grad=True)
tensor([5., 7., 9.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7f7349936fd0>


In [8]:
s = z.sum()
print(s)
print(s.grad_fn)

s.backward()
print(x.grad)

tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x7f73494f1d60>
tensor([1., 1., 1.])


In [9]:
# Detach from computation tracking. 
new_z = z.detach()
print(new_z.grad_fn)

None


# PyTorch: Layers and Activation Functions

In [10]:
# Linear layer mapping from dim=5 to dim=3
# The layer includes parameters W, b
lin = nn.Linear(5, 3)
list(lin.parameters())

[Parameter containing:
 tensor([[ 0.2485,  0.2113, -0.1850,  0.0898, -0.1738],
         [-0.4004,  0.0753, -0.1953, -0.1229, -0.4364],
         [ 0.3894,  0.1676,  0.2391,  0.3415,  0.0402]], requires_grad=True),
 Parameter containing:
 tensor([0.3819, 0.1956, 0.2790], requires_grad=True)]

In [11]:
x = torch.randn(2, 5)
print(x)

print(lin(x))
print(x.mm(lin.weight.T) + lin.bias)

tensor([[-1.7785, -1.7638, -1.0277, -1.1721,  1.0654],
        [-0.1361, -0.4240,  0.5884, -1.1480, -1.3654]])
tensor([[-0.5331,  0.6546, -1.3124],
        [ 0.2838,  0.8401, -0.1514]], grad_fn=<AddmmBackward>)
tensor([[-0.5331,  0.6546, -1.3124],
        [ 0.2838,  0.8401, -0.1514]], grad_fn=<AddBackward0>)


In [12]:
# Most people default to tanh or ReLU as non-linearity
x = torch.randn(2, 2)
print(x)
print(F.relu(x))

tensor([[-0.2580, -0.3184],
        [-0.3047,  0.3732]])
tensor([[0.0000, 0.0000],
        [0.0000, 0.3732]])


In [13]:
# Softmax & Probability
x = torch.randn(2, 5)
print(x)

# dim=-1 -> apply to the most inner axis
print(F.softmax(x, dim=-1))
print(F.softmax(x, dim=-1).sum(dim=1))
print(F.log_softmax(x, dim=-1))

tensor([[ 0.7600,  0.0070, -2.0061,  0.3675, -0.5086],
        [-1.1502,  0.3516, -1.2420,  2.0638,  0.0822]])
tensor([[0.4015, 0.1891, 0.0253, 0.2712, 0.1129],
        [0.0288, 0.1293, 0.0263, 0.7168, 0.0988]])
tensor([1.0000, 1.0000])
tensor([[-0.9125, -1.6654, -3.6786, -1.3050, -2.1810],
        [-3.5471, -2.0453, -3.6388, -0.3330, -2.3147]])
