# Notes and Tests

This is a jupyter notebook where different techniques and tests are performed in a REPL form.

### Import libraries

In [93]:
from torchvision.datasets import ImageFolder
from torchvision import transforms as tf
from torch.utils.data import random_split, DataLoader
import os
import torch

### Test

In [121]:
help(random_split)

Help on function random_split in module torch.utils.data.dataset:

random_split(dataset: torch.utils.data.dataset.Dataset[~T], lengths: Sequence[int], generator: Optional[torch._C.Generator] = <torch._C.Generator object at 0x7f4fd84c2810>) -> List[torch.utils.data.dataset.Subset[~T]]
    Randomly split a dataset into non-overlapping new datasets of given lengths.
    Optionally fix the generator for reproducible results, e.g.:
    
    >>> random_split(range(10), [3, 7], generator=torch.Generator().manual_seed(42))
    
    Arguments:
        dataset (Dataset): Dataset to be split
        lengths (sequence): lengths of splits to be produced
        generator (Generator): Generator used for the random permutation.



In [104]:
dataset_folder = "../../dataset_ufpr/cleaned"
transform = tf.Compose([
    tf.RandomResizedCrop(64),
    tf.RandomRotation(180),
    tf.PILToTensor(),
    tf.ConvertImageDtype(torch.float64),
    tf.Normalize(mean=[0.5],std=[0.5]),
])
dataset = ImageFolder(dataset_folder, transform=transform)
classes = dataset.classes
nb_images = sum([len(folder[2]) for folder in list(os.walk(dataset_folder))])
train_count = int(nb_images*2/3)
valid_count = int(nb_images*1/6)
test_count  = nb_images - train_count - valid_count
train_dataset, valid_dataset, test_dataset = random_split(
    dataset,
    (train_count, valid_count, test_count)
)
batch_size = 128
train_dataset_loader = DataLoader(
    train_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=1
)
valid_dataset_loader = DataLoader(
    valid_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=1
)
test_dataset_loader = DataLoader(
    test_dataset, 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=1
)
datasets = {
    "train": train_dataset,
    "val": valid_dataset,
    "test": test_dataset
}
dataloaders = {
    "train": train_dataset_loader,
    "val": valid_dataset_loader,
    "test": test_dataset_loader
}