<font color='blue'><h1>Introdução Prática ao PyTorch</h1></font>
<font color='blue'><h2>Datasets e Dataloaders em Pytorch</h2></font>

- Desacoplamento do código do dataset do código do treinamento
- Duas primitivas no Pytorch:
  - torch.utils.data.Dataset
  - torch.utils.data.dataLoader

#### Exemplo 1

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [7]:
import torch
from torch.utils.data import TensorDataset, DataLoader

In [9]:
iris = load_iris()
X, y = iris.data, iris.target
print(X.shape, y.shape)

(150, 4) (150,)


In [10]:
X_tensor = torch.tensor(X, dtype=torch.float)
y_tensor = torch.tensor(y, dtype=torch.long)

In [12]:
# definindo o conjunto de dados com tensores
train_dataset = TensorDataset(X_tensor, y_tensor)
train_dataset

<torch.utils.data.dataset.TensorDataset at 0x7674e00c6250>

In [23]:
# definindo o dataloader
train_dataloader = DataLoader(
    train_dataset, batch_size=10, shuffle=True)

In [24]:
len(train_dataloader) # qtde de iterações para percorrer todos os dados)

15

In [25]:
# loop
for count, data in enumerate(train_dataloader):
    inputs, labels = data
    print(count+1, inputs.shape, labels.shape)

1 torch.Size([10, 4]) torch.Size([10])
2 torch.Size([10, 4]) torch.Size([10])
3 torch.Size([10, 4]) torch.Size([10])
4 torch.Size([10, 4]) torch.Size([10])
5 torch.Size([10, 4]) torch.Size([10])
6 torch.Size([10, 4]) torch.Size([10])
7 torch.Size([10, 4]) torch.Size([10])
8 torch.Size([10, 4]) torch.Size([10])
9 torch.Size([10, 4]) torch.Size([10])
10 torch.Size([10, 4]) torch.Size([10])
11 torch.Size([10, 4]) torch.Size([10])
12 torch.Size([10, 4]) torch.Size([10])
13 torch.Size([10, 4]) torch.Size([10])
14 torch.Size([10, 4]) torch.Size([10])
15 torch.Size([10, 4]) torch.Size([10])


In [26]:
# Acessando ao batch sem loop
inputs, labels = next(iter(train_dataloader))

#### Exemplo 2

In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import torch

In [2]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X, y = data.data, data.target
X = (X - X.min()) / (X.max() - X.min())
print(X.shape, y.shape)

# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

(569, 30) (569,)


((381, 30), (188, 30), (381,), (188,))

In [3]:
class TrainDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(np.array(X), dtype=torch.float)
        self.y = torch.tensor(np.array(y), dtype=torch.float)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

    def __len__(self):
        return len(self.X)

class TestDataset(torch.utils.data.Dataset):
    def __init__(self, X):
        self.X = torch.tensor(np.array(X), dtype=torch.float)

    def __getitem__(self, index):
        return self.X[index]

    def __len__(self):
        return len(self.X)

In [4]:
def build_train_data_loader(X_train, y_train, batch_size):
  '''
  Recebe o conjunto de treinamento (X_train, y_train) e
  retorna um objeto para carregamento dos dados em minibatches
  '''

  # converte dados de array numpy para tensores em pytorch
  X_train_tensor = torch.tensor(np.array(X_train), dtype=torch.float)
  y_train_tensor = torch.tensor(np.array(y_train), dtype=torch.float)

  # cria o dataset como um TensorDataset do PyTorch
  train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)

  # Constrói um objeto DataLoader para o posterior treinamento em MiniBatches
  # Objeto que já deixa os dados divididos em batches
  train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

  return train_loader

In [6]:
train_loader = torch.utils.data.DataLoader(
    dataset=TrainDataset(X_train, y_train), 
    batch_size=16, shuffle=True)

In [7]:
# treinamento da rede
num_epochs = 10
for epoch in range(num_epochs):
  
  # laço para obter os dados em "batches"
  # percorre cada minilote no conjunto de treinamento
  count_batch = 0
  for inputs, labels in train_loader:
      pass