In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# PyTorch: Tensors and Autograd

In [2]:
m_data = [[1, 2, 3],
          [6, 5, 4]]
m = torch.tensor(m_data, dtype=torch.float)
m

tensor([[1., 2., 3.],
        [6., 5., 4.]])

In [3]:
x = torch.randn((2, 3, 4))
x

tensor([[[ 0.8621,  1.3001,  1.6746, -0.9454],
         [-1.2412,  2.0782, -0.6528, -0.5414],
         [ 0.0771, -0.9474,  2.1355,  0.0284]],

        [[-0.6245,  0.9582,  1.1625,  0.3573],
         [-0.2623,  1.5938,  0.4975,  0.3255],
         [-0.9988, -0.2040,  1.6705,  0.4629]]])

In [4]:
x1 = torch.randn((2, 4))
y1 = torch.randn((3, 4))
print(x1)
print(y1)
# By default, concatenate along first axis (dim)
# like np.concatenate...
z1 = torch.cat([x1, y1], dim=0)
print(z1)

tensor([[ 0.4317, -0.1789, -1.4176,  2.7956],
        [ 2.4455,  0.5367, -1.8635, -0.0413]])
tensor([[-0.3488, -0.3780, -0.3134, -2.0054],
        [ 0.2738, -0.8761,  2.0643, -1.2928],
        [-0.1407,  0.6571,  0.0211,  0.5552]])
tensor([[ 0.4317, -0.1789, -1.4176,  2.7956],
        [ 2.4455,  0.5367, -1.8635, -0.0413],
        [-0.3488, -0.3780, -0.3134, -2.0054],
        [ 0.2738, -0.8761,  2.0643, -1.2928],
        [-0.1407,  0.6571,  0.0211,  0.5552]])


In [5]:
x = torch.randn(2, 3, 4)
print(x)
# like np.reshape...
print(x.view(2, 12))
print(x.view(2, -1))

tensor([[[-0.4134, -2.3437,  0.3276,  0.4520],
         [ 2.2598, -1.0423, -0.4690, -0.3379],
         [-0.0252,  0.0714, -1.1776,  0.7061]],

        [[ 0.7615, -1.3326,  2.4765,  1.3274],
         [-0.0181, -2.7809, -0.6896, -0.7834],
         [ 0.2362,  1.2167,  1.4762,  1.2964]]])
tensor([[-0.4134, -2.3437,  0.3276,  0.4520,  2.2598, -1.0423, -0.4690, -0.3379,
         -0.0252,  0.0714, -1.1776,  0.7061],
        [ 0.7615, -1.3326,  2.4765,  1.3274, -0.0181, -2.7809, -0.6896, -0.7834,
          0.2362,  1.2167,  1.4762,  1.2964]])
tensor([[-0.4134, -2.3437,  0.3276,  0.4520,  2.2598, -1.0423, -0.4690, -0.3379,
         -0.0252,  0.0714, -1.1776,  0.7061],
        [ 0.7615, -1.3326,  2.4765,  1.3274, -0.0181, -2.7809, -0.6896, -0.7834,
          0.2362,  1.2167,  1.4762,  1.2964]])


In [6]:
# The Tensors have requires_grad=False, NOT tracking computation history, by default. 
x = torch.tensor([1, 2, 3], dtype=torch.float32)
print(x)

y = torch.tensor([4, 5, 6], dtype=torch.float32)
z = x + y
print(z)
print(z.grad_fn)

tensor([1., 2., 3.])
tensor([5., 7., 9.])
None


In [7]:
# The Tensors have requires_grad=True, tracking computation history. 
x = torch.tensor([1, 2, 3], dtype=torch.float32, requires_grad=True)
print(x)

y = torch.tensor([4, 5, 6], dtype=torch.float32, requires_grad=True)
z = x + y
print(z)
print(z.grad_fn)

tensor([1., 2., 3.], requires_grad=True)
tensor([5., 7., 9.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x0000023E3F3E0FC8>


In [8]:
s = z.sum()
print(s)
print(s.grad_fn)

s.backward()
print(x.grad)

tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x0000023E3F3E6C08>
tensor([1., 1., 1.])


In [9]:
# Detach from computation tracking. 
new_z = z.detach()
print(new_z.grad_fn)

None


# PyTorch: Layers and Activation Functions

In [10]:
# Linear layer mapping from dim=5 to dim=3
# The layer includes parameters W, b
lin = nn.Linear(5, 3)
list(lin.parameters())

[Parameter containing:
 tensor([[-0.0579,  0.3437,  0.3479,  0.3967, -0.3022],
         [-0.4053, -0.0244, -0.1431,  0.3994, -0.4026],
         [-0.0929,  0.3741, -0.2829,  0.1329, -0.3502]], requires_grad=True),
 Parameter containing:
 tensor([0.1193, 0.1004, 0.3738], requires_grad=True)]

In [11]:
x = torch.randn(2, 5)
print(x)

print(lin(x))
print(x.mm(lin.weight.T) + lin.bias)

tensor([[ 0.3871,  1.1387,  0.6009, -0.4370, -1.4277],
        [-1.0157,  1.2764,  0.7053, -1.0976, -0.6602]])
tensor([[0.9554, 0.2300, 1.0358],
        [0.6262, 0.2075, 0.8315]], grad_fn=<AddmmBackward>)
tensor([[0.9554, 0.2300, 1.0358],
        [0.6262, 0.2075, 0.8315]], grad_fn=<AddBackward0>)


In [12]:
# Most people default to tanh or ReLU as non-linearity
x = torch.randn(2, 2)
print(x)
print(F.relu(x))

tensor([[-1.9328,  1.3682],
        [-1.8452,  1.3094]])
tensor([[0.0000, 1.3682],
        [0.0000, 1.3094]])


In [13]:
# Softmax & Probability
x = torch.randn(2, 5)
print(x)

# dim=-1 -> apply to the most inner axis
print(F.softmax(x, dim=-1))
print(F.softmax(x, dim=-1).sum(dim=1))
print(F.log_softmax(x, dim=-1))

tensor([[ 0.1544,  1.5411,  0.2937,  1.4034, -0.0031],
        [ 1.3801, -1.1911, -1.6563,  0.0643, -0.9061]])
tensor([[0.0953, 0.3814, 0.1096, 0.3323, 0.0814],
        [0.6692, 0.0512, 0.0321, 0.1795, 0.0680]])
tensor([1.0000, 1.0000])
tensor([[-2.3507, -0.9639, -2.2114, -1.1016, -2.5082],
        [-0.4017, -2.9729, -3.4382, -1.7175, -2.6879]])
