# Load Packages

In [None]:
# Import sklearn
import sklearn
from sklearn.datasets import load_files 

# Import torch 
import torch
import torch.nn as nn
from torch import manual_seed as torch_manual_seed
from torch import Generator
from torch.optim import Adam
from torch.cuda import max_memory_allocated, set_device, manual_seed_all
from torch.backends import cudnn
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import ToTensor, transforms
from torchvision.datasets import ImageFolder
from torchvision import datasets, models, transforms
import torch.nn.functional as nnf

# Import other packages
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import sys
import os
import PIL
import PIL.Image
import random
from functools import partial
from timeit import default_timer as timer

# Random Seed for Reproduction

In [None]:
def setup_seed(seed):
    torch_manual_seed(seed)
    manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    cudnn.deterministic = True

SEED = 42
setup_seed(SEED)

# Import Data

In [None]:
data_dir = "data/"

In [None]:
# Download and load the training data
dataset_all = ImageFolder(data_dir, transform=transforms.ToTensor())

# Print number in full data set
size_all = len(dataset_all)
print(f'Before splitting the full dataset into train and test: len(dataset_all) = {size_all}')

# Split into Test, Validation, and Training
size_test = int(size_all * 0.10)
size_val = int(size_all * 0.20)
size_train = size_all - size_test - size_val
dataset_test, dataset_val, dataset_train = random_split(dataset_all, [size_test, size_val, size_train], 
                                                        generator=Generator().manual_seed(SEED))

print(f'After splitting the full dataset into test, validation, and training: \n \
      len(dataset_test) = {len(dataset_test)} \n \
      len(dataset_val) = {len(dataset_val)} \n \
      len(dataset_train) = {len(dataset_train)}')

In [None]:
from torchvision.transforms.functional import to_pil_image

# Visualize some of the train samples of one batch
# Make sure you create the class names that match the order of their appearances in the "files" variable
class_names = ['02', '03', '04', '05', '08', '10']

# Rows and columns are set to fit one training batch (32)
n_rows = 8
n_cols = 4
plt.figure(figsize=(n_cols * 3, n_rows * 3))
for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows, n_cols, index + 1)
        image, target = dataset_train[index]
        plt.imshow(to_pil_image(image))
        plt.axis('off')
        plt.title(class_names[target], fontsize=12)
plt.subplots_adjust(wspace=.2, hspace=.2)
plt.show()