<a href="https://colab.research.google.com/github/rexbrandy/Neural_Networks/blob/main/batch_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Batch Training

In this we create a basic Dataset module to better understand how datasets and dataloaders are used.

A dataloader is used to create smaller batches of data that can be iterated over. By performing the forward and backward pass on smaller batches of data we can adjust our gradients and weights more frequently and improve training speeds.

### Terms to know
 - epoch: 1 forward and backward pass of ALL training samples
 - batch_size: number of training samples in one forward and backward pass
 - number of iteratons: number of passes, each pass using [batch_size] number of samples

e.g 100 samples, batch_size=20 -> 100/20 = 5 iterations for 1 epoch

In [5]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

class WineDataset(Dataset):
    '''
        This is an example of a custom Dataset module 
        that inherits the torch.utils.data.Dataset class
    '''
    def __init__(self, file_name=None):
        if file_name is None:
            file_name = './data/wine.csv'

        xy = np.loadtxt(file_name, delimiter=',', dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:, 1:])# every row and from every row the 1st to last elements
        self.y = torch.from_numpy(xy[:, [0]])# every row and just the 0th element
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        # access class via index: dataset[0]
        return self.x[index], self.y[index]

    def __len__(self):
        # override len(): len(dataset)
        return self.n_samples

dataset = WineDataset()
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)

num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)


for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        if (i+1) %  5 == 0:
            print(f'epoch: {epoch+1}/{num_epochs}, step: {i+1}/{n_iterations}, inputs: {inputs.shape}')


epoch: 1/2, step: 5/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 10/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 15/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 20/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 25/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 30/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 35/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 40/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 45/45, inputs: torch.Size([2, 13])
epoch: 2/2, step: 5/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 10/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 15/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 20/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 25/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 30/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 35/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 40/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 45/45, inputs: torch.Size([2, 13])
