# 2. Working with data

In [None]:
import torch

## Dataset

In [None]:
from torch.utils.data import Dataset

### Example 1

In [None]:
class SimpleDataset(Dataset):
    def __init__(self):
        self.data = torch.rand(10)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        sample = self.data[index]
        label = sample > 0.5
        return (sample, label)

In [None]:
dt = SimpleDataset()

In [None]:
dt.data

### Example 2

In [None]:
!tree -nd alien-vs-predator

![predator](alien-vs-predator/train/predator/10.jpg)
![alien](alien-vs-predator/train/alien/10.jpg)

In [None]:
from pathlib import Path
from PIL import Image

class AlienPredatorDataset(Dataset):
    def __init__(self, root, split):
        self.root = root
        self.split = split
        
        # Load and save all image paths
        self.img_instances = []
        
        for img_path in Path(root, split, "alien").glob("*.jpg"):
            self.img_instances.append((img_path, 0))
            
        for img_path in Path(root, split, "predator").glob("*.jpg"):
            self.img_instances.append((img_path, 1))
    
    
    def __len__(self):
        return len(self.img_instances)
    
    
    def __getitem__(self, index):
        path, target = self.img_instances[index]
        
        with open(path, 'rb') as f:
            img = Image.open(f).convert('RGB')
            
        return img, target

In [None]:
dt = AlienPredatorDataset("alien-vs-predator/", "train")

In [None]:
len(dt)

In [None]:
dt[0]

### Using torchvision ImageFolder

In [None]:
from torchvision.datasets import ImageFolder

In [None]:
dataset = ImageFolder(root="alien-vs-predator/")

In [None]:
dataset[0]

### Using torchvision's transforms

In [None]:
from torchvision.transforms import ToTensor, RandomHorizontalFlip, CenterCrop, ToPILImage

In [None]:
from torchvision.transforms.functional import to_tensor, hflip, center_crop, to_pil_image

In [None]:
img = dt[0][0]
img

In [None]:
hflip(img)

In [None]:
center_crop(img, output_size=(60,60))

In [None]:
from torchvision.transforms import Compose

In [None]:
transform = Compose((
    CenterCrop((100, 100)),
    RandomHorizontalFlip(),
)) #

### Apply tranform to a dataset

In [None]:
dataset = ImageFolder(root="alien-vs-predator/")

In [None]:
dataset[0]

## Dataloaders

In [None]:
from torch.utils.data import DataLoader

In [None]:
loader = DataLoader(dataset, batch_size=5, shuffle=True) #

## Building our training loop (2 / 5)

In [10]:
# INITIALIZATION

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, RandomCrop
from torchvision.datasets import ImageFolder

device = torch.device("cpu")

transform = Compose((RandomCrop((50, 50)), ToTensor()))
dataset = ImageFolder(root="alien-vs-predator/", transform=transform)
loader = DataLoader(dataset, batch_size=5, shuffle=True)

In [11]:
# TRAINING LOOP

for samples, labels in loader:
    samples = samples.to(device)
    labels = labels.to(device)
    # compute predictions with model
    # compute the loss
    # compute gradients
    # update model parameters