# PyTorch Dataset and Dataloader Classes

It is more efficient to do batch training and updating the weights accordingly
instead of doing it over the entire dataset.

- epoch = 1 forward and backward pass of ALL training samples

- batch_size = number of traiing samples in one forward and backward pass

- number of iterations = number of passes, each pass using batch_size number of samples

eg. 100 samples, batch_size=20 -> 100/20 = 5 iterations for 1 epoch

In [1]:
# training loop
# loop in a training loop for each batch

In [9]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

class WineDataset(Dataset):
    """
    This class is a way to represent the data
    """
    def __init__(self):
        # data loading
        # load data with numpy
        xy = np.loadtxt('wine.csv',delimiter=',',dtype=np.float32, skiprows=1)
        
        # split dataset into x and y
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:,[0]]) # n_samples, 1
        
        # get the numper of samples
        self.n_samples = xy.shape[0] # the first dim is the num of smaples
        
    def __getitem__(self, index):
        # dataset indexing
        return self.x[index], self.y[index]
    
    def __len__(self):
        # call length of dataset
        return self.n_samples
    
dataset = WineDataset()


# lets now use dataloader

dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)

#convert dataloader to iterator
dataiter = iter(dataloader)
data = dataiter.next()
features, labels = data
print(features, labels)

# do a dummy training loop

# training loop
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)

for epoch in range(num_epochs):
    # loop over train loader
    for i, (inputs, labels) in enumerate(dataloader):
        # forward then backward then update
        if (i+1) % 5 == 0:
            print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_iterations}, inputs {inputs.shape}')

tensor([[1.2040e+01, 4.3000e+00, 2.3800e+00, 2.2000e+01, 8.0000e+01, 2.1000e+00,
         1.7500e+00, 4.2000e-01, 1.3500e+00, 2.6000e+00, 7.9000e-01, 2.5700e+00,
         5.8000e+02],
        [1.3360e+01, 2.5600e+00, 2.3500e+00, 2.0000e+01, 8.9000e+01, 1.4000e+00,
         5.0000e-01, 3.7000e-01, 6.4000e-01, 5.6000e+00, 7.0000e-01, 2.4700e+00,
         7.8000e+02],
        [1.1660e+01, 1.8800e+00, 1.9200e+00, 1.6000e+01, 9.7000e+01, 1.6100e+00,
         1.5700e+00, 3.4000e-01, 1.1500e+00, 3.8000e+00, 1.2300e+00, 2.1400e+00,
         4.2800e+02],
        [1.3490e+01, 3.5900e+00, 2.1900e+00, 1.9500e+01, 8.8000e+01, 1.6200e+00,
         4.8000e-01, 5.8000e-01, 8.8000e-01, 5.7000e+00, 8.1000e-01, 1.8200e+00,
         5.8000e+02]]) tensor([[2.],
        [3.],
        [2.],
        [3.]])
178 45
epoch 1/2, step 5/45, inputs torch.Size([4, 13])
epoch 1/2, step 10/45, inputs torch.Size([4, 13])
epoch 1/2, step 15/45, inputs torch.Size([4, 13])
epoch 1/2, step 20/45, inputs torch.Size([4, 13])
