In [1]:
# Author: Robert Guthrie

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x1a127680750>

In [2]:
# torch.tensor(data) creates a torch.Tensor object with the given data.
V_data = [1., 2., 3.]
V = torch.tensor(V_data)
print(V)

# Creates a matrix
M_data = [[1., 2., 3.], [4., 5., 6]]
M = torch.tensor(M_data)
print(M)

# Create a 3D tensor of size 2x2x2.
T_data = [[[1., 2.], [3., 4.]],
          [[5., 6.], [7., 8.]]]
T = torch.tensor(T_data)
print(T)

tensor([1., 2., 3.])
tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])


In [3]:
# Index into V and get a scalar (0 dimensional tensor)
print(V[0])
# Get a Python number from it
print(V[0].item())

# Index into M and get a vector
print(M[0])

# Index into T and get a matrix
print(T[0])

tensor(1.)
1.0
tensor([1., 2., 3.])
tensor([[1., 2.],
        [3., 4.]])


In [4]:
x = torch.randn((3, 4, 5))
print(x)

tensor([[[-1.5256, -0.7502, -0.6540, -1.6095, -0.1002],
         [-0.6092, -0.9798, -1.6091, -0.7121,  0.3037],
         [-0.7773, -0.2515, -0.2223,  1.6871,  0.2284],
         [ 0.4676, -0.6970, -1.1608,  0.6995,  0.1991]],

        [[ 0.8657,  0.2444, -0.6629,  0.8073,  1.1017],
         [-0.1759, -2.2456, -1.4465,  0.0612, -0.6177],
         [-0.7981, -0.1316,  1.8793, -0.0721,  0.1578],
         [-0.7735,  0.1991,  0.0457,  0.1530, -0.4757]],

        [[-0.1110,  0.2927, -0.1578, -0.0288,  0.4533],
         [ 1.1422,  0.2486, -1.7754, -0.0255, -1.0233],
         [-0.5962, -1.0055,  0.4285,  1.4761, -1.7869],
         [ 1.6103, -0.7040, -0.1853, -0.9962, -0.8313]]])


In [5]:
x = torch.tensor([1., 2., 3.])
y = torch.tensor([4., 5., 6.])
z = x + y
print(z)

tensor([5., 7., 9.])


In [8]:
# By default, it concatenates along the first axis (concatenates rows)
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 = torch.cat([x_1, y_1])
print(z_1)

# Concatenate columns:
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
# second arg specifies which axis to concat along
z_2 = torch.cat([x_2, y_2], 1)
print(z_2)

# If your tensors are not compatible, torch will complain.  Uncomment to see the error
#torch.cat([x_1, x_2])

tensor([[-0.3339, -1.4724,  0.7296, -0.1312, -0.6368],
        [ 1.0429,  0.4903,  1.0318, -0.5989,  1.6015],
        [-1.0735, -1.2173,  0.6472, -0.0412, -0.1775],
        [-0.5000,  0.8673, -0.2732, -0.4608, -0.0991],
        [ 0.4728,  1.0049, -0.2871, -1.1619,  0.0276]])
tensor([[ 0.5652, -0.0115,  0.6706,  1.6169, -0.9026,  0.1737,  0.0772, -0.9339],
        [-0.4929,  1.5050, -2.3264,  0.0914,  1.3940, -0.6877, -0.5058,  0.1924]])


In [9]:
x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12))  # Reshape to 2 rows, 12 columns
# Same as above.  If one of the dimensions is -1, its size can be inferred
print(x.view(2, -1))

tensor([[[ 1.4023,  0.4841, -0.7030, -0.8268],
         [ 0.1012,  0.1952, -1.1796, -1.4423],
         [ 0.2936, -0.4139, -0.0960, -1.3281]],

        [[ 0.2324,  0.8615,  0.6218, -1.7812],
         [-0.0339, -0.5396, -1.8074,  0.3425],
         [ 0.8532,  0.0551, -1.7425,  0.8750]]])
tensor([[ 1.4023,  0.4841, -0.7030, -0.8268,  0.1012,  0.1952, -1.1796, -1.4423,
          0.2936, -0.4139, -0.0960, -1.3281],
        [ 0.2324,  0.8615,  0.6218, -1.7812, -0.0339, -0.5396, -1.8074,  0.3425,
          0.8532,  0.0551, -1.7425,  0.8750]])
tensor([[ 1.4023,  0.4841, -0.7030, -0.8268,  0.1012,  0.1952, -1.1796, -1.4423,
          0.2936, -0.4139, -0.0960, -1.3281],
        [ 0.2324,  0.8615,  0.6218, -1.7812, -0.0339, -0.5396, -1.8074,  0.3425,
          0.8532,  0.0551, -1.7425,  0.8750]])


In [10]:
# Tensor factory methods have a ``requires_grad`` flag
x = torch.tensor([1., 2., 3], requires_grad=True)

# With requires_grad=True, you can still do all the operations you previously
# could
y = torch.tensor([4., 5., 6], requires_grad=True)
z = x + y
print(z)

# BUT z knows something extra.
print(z.grad_fn)

tensor([5., 7., 9.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x000001A128586390>


In [11]:
# Lets sum up all the entries in z
s = z.sum()
print(s)
print(s.grad_fn)

tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x000001A12856BDD8>


In [12]:
# calling .backward() on any variable will run backprop, starting from it.
s.backward()
print(x.grad)

tensor([1., 1., 1.])


In [13]:
x = torch.randn(2, 2)
y = torch.randn(2, 2)
# By default, user created Tensors have ``requires_grad=False``
print(x.requires_grad, y.requires_grad)
z = x + y
# So you can't backprop through z
print(z.grad_fn)

# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad``
# flag in-place. The input flag defaults to ``True`` if not given.
x = x.requires_grad_()
y = y.requires_grad_()
# z contains enough information to compute gradients, as we saw above
z = x + y
print(z.grad_fn)
# If any input to an operation has ``requires_grad=True``, so will the output
print(z.requires_grad)

# Now z has the computation history that relates itself to x and y
# Can we just take its values, and **detach** it from its history?
new_z = z.detach()

# ... does new_z have information to backprop to x and y?
# NO!
print(new_z.grad_fn)
# And how could it? ``z.detach()`` returns a tensor that shares the same storage
# as ``z``, but with the computation history forgotten. It doesn't know anything
# about how it was computed.
# In essence, we have broken the Tensor away from its past history

False False
None
<AddBackward0 object at 0x000001A128586E80>
True
None


In [14]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


In [16]:
lin = nn.Linear(5, 3)  # maps from R^5 to R^3, parameters A, b
# data is 2x5.  A maps from 5 to 3... can we map "data" under A?
data = torch.randn(10,2, 5)
print(lin(data))  # yes

tensor([[[ 0.7427, -0.4709, -0.6170],
         [-0.9232,  1.7096, -0.1141]],

        [[ 0.1571,  0.8405, -0.3904],
         [ 0.9822, -0.1439, -0.9309]],

        [[-0.5673,  1.3977, -0.3661],
         [ 0.9900,  0.0496, -0.2378]],

        [[-0.9293,  1.1629, -0.2616],
         [-0.9165,  1.1027,  0.1332]],

        [[ 0.5412,  0.8358, -0.4429],
         [ 0.7587,  0.1732, -0.2222]],

        [[ 0.0634, -0.0859, -0.4064],
         [ 0.2016,  0.9723, -0.2891]],

        [[-0.5936,  0.0828, -0.3980],
         [ 0.7123, -0.3259, -0.5176]],

        [[ 0.3149, -0.0067, -0.3985],
         [ 0.0297, -0.0544, -0.2285]],

        [[-0.3191,  0.0892, -0.1028],
         [-0.9154,  0.6945, -0.3821]],

        [[-0.7324,  0.3048, -0.2287],
         [ 0.1844, -0.3945, -0.4170]]], grad_fn=<AddBackward0>)


In [17]:
data = torch.randn(2, 2)
print(data)
print(F.relu(data))

tensor([[ 0.2994, -0.9864],
        [-1.8248,  0.5989]])
tensor([[0.2994, 0.0000],
        [0.0000, 0.5989]])


In [18]:
# Softmax is also in torch.nn.functional
data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum())  # Sums to 1 because it is a distribution!
print(F.log_softmax(data, dim=0))  # theres also log_softmax

tensor([ 1.1414e+00,  3.1890e-02,  3.7484e-01, -1.1240e-03,  9.8855e-01])
tensor([0.3365, 0.1110, 0.1563, 0.1074, 0.2888])
tensor(1.)
tensor([-1.0891, -2.1986, -1.8557, -2.2316, -1.2420])
