<a href="https://colab.research.google.com/github/rodrigocan/made-with-ml/blob/main/notebooks/05_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch

Neste notebook nós vamos aprender o básico do PyTorch, que é uma biblioteca de aprendizado de máquina utilizada para construir redes neurais dinâmicas. Aprenderemos coisas como a criação e utilização de tensores.

# Configuração

In [139]:
import numpy as np
import torch
import random

In [140]:
SEED = 1234

In [141]:
# Definindo um seed para que seja reproduzir os resultados em outro ambiente
torch.cuda.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Básico

In [142]:
# Criando um tensor aleatório
x = torch.randn(2, 3) # distribuição normal (rand(2, 3) -> distribuição uniforme)
print(f"Type: {x.type()}")
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

Type: torch.FloatTensor
Size: torch.Size([2, 3])
Values: 
tensor([[ 0.0461,  0.4024, -1.0115],
        [ 0.2167, -0.6123,  0.5036]])


In [143]:
# Tensor com 0 e 1
x = torch.zeros(2, 3)
print(x)
x = torch.ones(2, 3)
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])


In [144]:
# Lista -> Tensor
x = torch.Tensor([[1, 2, 3], [4, 5, 6]])
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

Size: torch.Size([2, 3])
Values: 
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [145]:
# Array do NumPy -> Tensor
x = torch.Tensor(np.random.rand(2, 3))
print(f"Size: {x.shape}")
print(f"Values: \n{x}")

Size: torch.Size([2, 3])
Values: 
tensor([[0.1915, 0.6221, 0.4377],
        [0.7854, 0.7800, 0.2726]])


In [146]:
# Mudando o tipo do tensor
x = torch.Tensor(3, 4)
print(f"Type: {x.type()}")
x = x.long()
print(f"Type: {x.type()}")

Type: torch.FloatTensor
Type: torch.LongTensor


# Operações

In [147]:
# Adição
x = torch.randn(2, 3)
y = torch.randn(2, 3)
z = x + y
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

Size: torch.Size([2, 3])
Values: 
tensor([[ 0.0761, -0.6775, -0.3988],
        [ 3.0633, -0.1589,  0.3514]])


In [148]:
# Multiplicação de matrizes
x = torch.randn(2, 3)
y = torch.randn(3, 2)
z = torch.mm(x, y)
print(f"Size: z{z.shape}")
print(f"Values: \n{z}")

Size: ztorch.Size([2, 2])
Values: 
tensor([[ 1.0796, -0.0759],
        [ 1.2746, -0.5134]])


In [149]:
# Transposta
x = torch.randn(2, 3)
print(f"Size: {x.shape}")
print(f"Values: \n{x}")
y = torch.t(x)
print(f"Size: {y.shape}")
print(f"Values: \n{y}")

Size: torch.Size([2, 3])
Values: 
tensor([[ 0.8042, -0.1383,  0.3196],
        [-1.0187, -1.3147,  2.5228]])
Size: torch.Size([3, 2])
Values: 
tensor([[ 0.8042, -1.0187],
        [-0.1383, -1.3147],
        [ 0.3196,  2.5228]])


In [150]:
# Reshape
x = torch.randn(2, 3)
z = x.view(3, 2)
print(f"Size: {z.shape}")
print(f"Values: \n{z}")

Size: torch.Size([3, 2])
Values: 
tensor([[ 0.4501,  0.2709],
        [-0.8087, -0.0217],
        [-1.0413,  0.0702]])


In [151]:
# Perigos do reshape (consequências não desejadas)
x = torch.tensor([
    [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]],
    [[10, 10, 10, 10], [20, 20, 20, 20], [30, 30, 30, 30]]
])
print(f"Size: {x.shape}")
print(f"x: \n{x}\n")

a = x.view(x.size(1), -1)
print(f"\nSize: {a.shape}")
print(f"a: \n{a}\n")

b = x.transpose(0, 1).contiguous()
print(f"\nSize: {b.shape}")
print(f"b: \n{b}\n")

c = b.view(b.size(0), -1)
print(f"\nSize: {c.shape}")
print(f"c: \n{c}\n")

Size: torch.Size([2, 3, 4])
x: 
tensor([[[ 1,  1,  1,  1],
         [ 2,  2,  2,  2],
         [ 3,  3,  3,  3]],

        [[10, 10, 10, 10],
         [20, 20, 20, 20],
         [30, 30, 30, 30]]])


Size: torch.Size([3, 8])
a: 
tensor([[ 1,  1,  1,  1,  2,  2,  2,  2],
        [ 3,  3,  3,  3, 10, 10, 10, 10],
        [20, 20, 20, 20, 30, 30, 30, 30]])


Size: torch.Size([3, 2, 4])
b: 
tensor([[[ 1,  1,  1,  1],
         [10, 10, 10, 10]],

        [[ 2,  2,  2,  2],
         [20, 20, 20, 20]],

        [[ 3,  3,  3,  3],
         [30, 30, 30, 30]]])


Size: torch.Size([3, 8])
c: 
tensor([[ 1,  1,  1,  1, 10, 10, 10, 10],
        [ 2,  2,  2,  2, 20, 20, 20, 20],
        [ 3,  3,  3,  3, 30, 30, 30, 30]])



In [152]:
# Operações dimensionais
x = torch.randn(2, 3)
print(f"Values: \n{x}")
y = torch.sum(x, dim = 0) # soma o valor de cada linha por coluna
print(f"Values: \n{y}")
z = torch.sum(x, dim = 1) # soma o valor de cada coluna por linha
print(f"Values: \n{z}")

Values: 
tensor([[ 0.5797, -0.0599,  0.1816],
        [-0.6797, -0.2567, -1.8189]])
Values: 
tensor([-0.1000, -0.3166, -1.6373])
Values: 
tensor([ 0.7013, -2.7553])


# *Indexing*, *Splicing* e *Slicing*

Agora veremos como extrair, separar e juntar valores de nossos tensores.

In [153]:
x = torch.randn(3, 4)
print(f"x: \n{x}")
print(f"x[:1]: \n{x[:1]}")
print(f"x[:1, 1:3]: \n{x[:1, 1:3]}")

x: 
tensor([[ 0.2111,  0.3372,  0.6638,  1.0397],
        [ 1.8434,  0.6588, -0.2349, -0.0306],
        [ 1.7462, -0.0722, -1.6794, -1.7010]])
x[:1]: 
tensor([[0.2111, 0.3372, 0.6638, 1.0397]])
x[:1, 1:3]: 
tensor([[0.3372, 0.6638]])


In [154]:
# Selecionando com tensores
x = torch.randn(2, 3)
print(f"Values: \n{x}")

col_indices = torch.LongTensor([0, 2])
chosen = torch.index_select(x, dim = 1, index = col_indices)
print(f"Values: \n{chosen}")

row_indices = torch.LongTensor([0, 1])
col_indices = torch.LongTensor([0, 2])
chosen = x[row_indices, col_indices] # valores de (0, 0) e (1, 2)
print(f"Values: \n{chosen}")

Values: 
tensor([[ 0.6486,  1.7653,  1.0812],
        [ 1.2436,  0.8971, -0.0784]])
Values: 
tensor([[ 0.6486,  1.0812],
        [ 1.2436, -0.0784]])
Values: 
tensor([ 0.6486, -0.0784])


In [155]:
# Concatenação
x = torch.randn(2, 3)
print(f"Values: \n{x}")
y = torch.cat([x, x], dim = 0) # junta pelas linhas (dim = 1 junta pelas colunas)
print(f"Values: \n{y}")

Values: 
tensor([[ 0.5548, -0.0845,  0.5903],
        [-1.0032, -1.7873,  0.0538]])
Values: 
tensor([[ 0.5548, -0.0845,  0.5903],
        [-1.0032, -1.7873,  0.0538],
        [ 0.5548, -0.0845,  0.5903],
        [-1.0032, -1.7873,  0.0538]])


# Gradientes

* $ y = 3x + 2 $
* $ z = \sum{y}/N $
* $ \frac{\partial(z)}{\partial(x)} = \frac{\partial(z)}{\partial(y)} \frac{\partial(y)}{\partial(x)} = \frac{1}{N} * 3 = \frac{1}{12} * 3 = 0.25 $

In [156]:
# Tensores com gradient bookkeeping
x = torch.rand(3, 4, requires_grad = True)
y = 3*x + 2
z = y.mean()
z.backward() # z tem que ser um escalar
print(f"x: \n{x}")
print(f"x.grad: \n{x.grad}")

x: 
tensor([[0.7379, 0.0846, 0.4245, 0.9778],
        [0.6800, 0.3151, 0.3911, 0.8943],
        [0.6889, 0.8389, 0.1780, 0.6442]], requires_grad=True)
x.grad: 
tensor([[0.2500, 0.2500, 0.2500, 0.2500],
        [0.2500, 0.2500, 0.2500, 0.2500],
        [0.2500, 0.2500, 0.2500, 0.2500]])


# Tensores CUDA

In [157]:
# Cuda disponível?
print(torch.cuda.is_available())

False


Se retornar False (CUDA não disponível), vamos mudar seguindo as seguintes etapas: Ambiente de Execução > Alterar o tipo de ambiente de execução > Mudar o acelerador de hardware para GPU > Salvar

In [1]:
import torch

In [2]:
# CUDA disponível agora?
print(torch.cuda.is_available())

True


In [3]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
x = torch.rand(2, 3)
print(x.is_cuda)
x = torch.rand(2, 3).to(device) # Tensor armazenado na GPU
print(x.is_cuda)

False
True
