# OverView

In this series of tutorials, we will introduce some basics of Pytorch. Pay attention that the API's of any library will be subject to changes, therefore it is very important the offcial documentation before proceeding.

we will cover the following materials in the tutorial.

1. Create tensors
2. some common mainpulations(including boardcast and einsum)

In [3]:
import torch
my_tensor = torch.tensor([[0.0, 1.0], [0.1, 0.2]])
my_tensor

tensor([[0.0000, 1.0000],
        [0.1000, 0.2000]])

In [4]:
new_tensor = my_tensor.int().float()
new_tensor

tensor([[0., 1.],
        [0., 0.]])

In [10]:
import numpy as np
np_tensor = np.array([[1.0, 2.0], [0.1, 0.2]])
tensor_from_np = torch.tensor(np_tensor)
print(tensor_from_np)
tensor_from_np = torch.tensor(np_tensor).float()
print(tensor_from_np)
to_numpy = tensor_from_np.numpy()
print(to_numpy)

tensor([[1.0000, 2.0000],
        [0.1000, 0.2000]], dtype=torch.float64)
tensor([[1.0000, 2.0000],
        [0.1000, 0.2000]])
[[1.  2. ]
 [0.1 0.2]]


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
my_tensor.to(device=device) # https://pytorch.org/docs/stable/cuda.html

tensor([[0.0000, 1.0000],
        [0.1000, 0.2000]], device='cuda:0')

In [12]:
to_numpy = my_tensor.numpy()
single_number = torch.tensor([0])
single_number.item()

0

In [13]:
tensor_with_gradient = torch.tensor([[0.1, 1.0], [1.0,2.0]], requires_grad=True)
result = tensor_with_gradient.pow(2).sum()
result.backward()
tensor_with_gradient.grad

tensor([[0.2000, 2.0000],
        [2.0000, 4.0000]])

In [14]:
tensor_with_gradient.detach_()
# tensor_with_gradient = tensor_with_gradient.detach()

tensor([[0.1000, 1.0000],
        [1.0000, 2.0000]])

## Basic Operations

In [21]:
x = torch.tensor([[1, 2], [0.2, 0.1]])
x

tensor([[1.0000, 2.0000],
        [0.2000, 0.1000]])

In [16]:
x + 1

tensor([[2.0000, 3.0000],
        [1.2000, 1.1000]])

In [17]:
x * 2

tensor([[2.0000, 4.0000],
        [0.4000, 0.2000]])

In [18]:
y = torch.tensor([[3,1], [0.4, 1]])
x + y

tensor([[4.0000, 3.0000],
        [0.6000, 1.1000]])

In [23]:
print(x[:,:])
print(x[1,:])
print(x[:,1])
print(x)
print(x.shape)
xu = x.unsqueeze(0)
print(xu)
print(xu.shape)
xs = x.squeeze() # delete the dimension sizes of one
print(xs)
print(xs.shape)

tensor([[1.0000, 2.0000],
        [0.2000, 0.1000]])
tensor([0.2000, 0.1000])
tensor([2.0000, 0.1000])
tensor([[1.0000, 2.0000],
        [0.2000, 0.1000]])
torch.Size([2, 2])
tensor([[[1.0000, 2.0000],
         [0.2000, 0.1000]]])
torch.Size([1, 2, 2])
tensor([[1.0000, 2.0000],
        [0.2000, 0.1000]])
torch.Size([2, 2])


In [25]:
# BROADCASTING SEMANTICS
>>> x=torch.empty(5,7,3)
>>> y=torch.empty(5,7,3)
# same shapes are always broadcastable (i.e. the above rules always hold)

>>> x=torch.empty((0,))
>>> y=torch.empty(2,2)
# x and y are not broadcastable, because x does not have at least 1 dimension

# can line up trailing dimensions
>>> x=torch.empty(5,3,4,1)
>>> y=torch.empty(  3,1,1)
# x and y are broadcastable.
# 1st trailing dimension: both have size 1
# 2nd trailing dimension: y has size 1
# 3rd trailing dimension: x size == y size
# 4th trailing dimension: y dimension doesn't exist

# but:
>>> x=torch.empty(5,2,4,1)
>>> y=torch.empty(  3,1,1)
# x and y are not broadcastable, because in the 3rd trailing dimension 2 != 3

In [28]:
# EINSUM
x = torch.randn(5)
y = torch.randn(5)
print(x)
print(y)
torch.einsum('i,j->ij', x, y)

tensor([-2.0665, -0.9771, -0.0890, -0.8344,  0.2659])
tensor([-2.6436,  0.5505,  0.5054, -0.3978,  0.5616])


tensor([[ 5.4631, -1.1376, -1.0445,  0.8220, -1.1605],
        [ 2.5832, -0.5379, -0.4939,  0.3887, -0.5488],
        [ 0.2353, -0.0490, -0.0450,  0.0354, -0.0500],
        [ 2.2058, -0.4593, -0.4217,  0.3319, -0.4686],
        [-0.7028,  0.1464,  0.1344, -0.1057,  0.1493]])

In [27]:
A = torch.rand(3,5,4)
l = torch.rand(2,5)
r = torch.rand(2,4)
torch.einsum('bn,anm,bm->ba', l, A, r)

tensor([[1.5859, 1.7501, 2.1571],
        [2.0135, 2.5645, 2.4143]])