In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict
from tqdm import tqdm

In [18]:
# TODO: Add regularization, num_workers, CUDA concepts, sequential models, batch normalization

Data standardization is a specific type of normalization technique. It is sometimes referred to as z-score normalization. The z-score, a.k.a. standard score, is the transformed value for each data point.

To normalize a dataset using standardization, we take every value
inside the dataset and transform it to its correspondingvalue using the following formula:

z = (x - mean)/std

After performing this computation on every value inside our dataset, we have a new normalized dataset of values. The mean and standard deviation values are with respect to the dataset as a whole. 

<i>It's important to note that when we normalize a dataset, we typically group these operations by feature. This means that the mean and standard deviation values are relative to each feature set that's being normalized. If we are working with images, the features are the RGB color channels, so we normalize each color channel with respect to the mean and standard deviation values calculated across all pixels in every images for the respective color channel. In our case we only needs to
normalize a single color channel</i>

In [19]:
# Normalization typically occurs at the extraction and transform stages of the ETL process, we can pass the mean and std
# via the Normalize method as such:
# torchvision.transforms.Normalize(
#       [meanOfChannel1, meanOfChannel2, meanOfChannel3] 
#     , [stdOfChannel1, stdOfChannel2, stdOfChannel3] 
# )
# However, we dont have the mean and std of the channel we are working with and will need to calculate it

train_set = torchvision.datasets.FashionMNIST(
    root='/home/slabban/machine_learning_courses/datasets'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ]) 
)

Moving forward we will start implemention the 'num_workwers' in our dataloaders to increase the speed of our trainings. 
In a nutshell 'num_workers' specifies the amount of subprocesses can be used to read the data from disk while the main process runs.
From the deeplizard course, the biggest improvement came when 1 num workers was added, with diminishing returns as the number as increased.

This could be different for other cases, but we will stick to 1 num worker for the time being.

In [20]:
# If we are dealing with a dataset with a total size that our computer can handle in one run we can simply do this:

loader = DataLoader(train_set, batch_size=len(train_set), num_workers=1)
images, labels = next(iter(loader))
images.mean(), images.std()

(tensor(0.2860), tensor(0.3530))

In [21]:
# Often times we will be dealing with huge datasets, we to tackle that case by spliting the set into batches
# and implementing the mean and std formulas

loader = DataLoader(train_set, batch_size=1000, num_workers=1)

num_of_pixels = len(train_set) * 28 * 28
total_sum = 0
for images, labels in loader: total_sum += images.sum()
mean = total_sum / num_of_pixels

sum_of_squared_error = 0
for images, labels in loader: 
    sum_of_squared_error += ((images - mean).pow(2)).sum()
std = torch.sqrt(sum_of_squared_error / num_of_pixels)

mean, std

(tensor(0.2860), tensor(0.3530))

In [22]:
# Lets now inlcude normalization in our extract and transform steps:

train_set = torchvision.datasets.FashionMNIST(
    root='/home/slabban/machine_learning_courses/datasets'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)])
)

In [23]:
# The new output of this mean and std is 0 and 1 respectively 

loader = DataLoader(
      train_set
    , batch_size=len(train_set)
    , num_workers=1
)
data = next(iter(loader))
data[0].mean(), data[0].std()

(tensor(-9.3774e-08), tensor(1.))


PyTorch allows us to seamlessly move data to and from our GPU as we preform computations inside our programs.

When we go to the GPU, we can use the cuda() method, and when we go to the CPU, we can use the cpu() method.

We can also use the to() method. To go to the GPU, we write to('cuda') and to go to the CPU, we write to('cpu'). The to() method is the preferred way mainly because it is more flexible. We'll see one example using using the first two, and then we'll default to always using the to() variant.

CPU 	GPU
cpu() 	cuda()
to('cpu') 	to('cuda')

To make use of our GPU during the training process, there are two essential requirements. These requirements are as follows, the data must be moved to the GPU, and the network must be moved to the GPU.

    Data on the GPU
    Network on the GPU

In [25]:
# Lets pull in the very familiar RunManager implementation
# We are going to add a simple line at the 'add_graph' at of the tensorboard's 'Summary Writer' instance that will make our Run Manager class
# device Agnostic. we are using the getattr() built in function to get the value of the device on the run object. 
# If the run object doesn't have a device, then cpu is returned. This makes the code backward compatible. 
# It will still work if we don't specify a device for our run

# I will also add some flexibility to the class to allow us to disable tensorboard for file management

class RunManager():
    def __init__(self, tensorboard=False):
        # TODO: extract epoch && run variables into individual classes
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None

        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        self.network = None
        self.loader = None

        self.istb = tensorboard
        self.tb = None

        self.tqdm_epoch = None

    def begin_run(self, run, network, loader):
        self.run_start_time = time.time()
        self.run_params = run
        self.run_count += 1

        self.network = network
        self.loader = loader
        images, labels = next(iter(self.loader))

        grid = torchvision.utils.make_grid(images)
        if(self.istb):
            self._create_tb(run, grid, images)

    def _create_tb(self, run, grid, images):
        self.tb = SummaryWriter(comment=f'-{run}')
        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images.to(getattr(run, 'device', 'cpu')))

    def end_run(self):
        if(self.istb):
            self._close_tb()
        self.epoch_count = 0
    
    def _close_tb(self):
        self.tb.close()
    
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.tqdm_epoch = tqdm(self.loader, unit="batch")
        
    
    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        loss = self.epoch_loss /len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        if(self.istb):
            self._plot_tb(self, loss, accuracy)
            

        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        for key,val in self.run_params._asdict().items(): results[key] = val
        self.run_data.append(results)

        df = pd.DataFrame.from_dict(self.run_data, orient='columns')

        clear_output(wait=True)
        display(df)

    def _plot_tb(self, loss, accuracy):
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
        
    def track_loss(self, loss, batch):
        self.epoch_loss += loss.item() * batch[0].shape[0]
    
    @torch.no_grad()
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    

    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        pd.DataFrame.from_dict(
            self.run_data, orient='columns'
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)
    
        