### Dataloader in Pytorch

In [None]:
# Some Info: 

total_batches = 10 
# DataLoader and Dataset

# INFO 
# training loop 
for epoch in range(1000):
    # loop over all batches 
    for i in range(total_batches):
        x_batch, y_batch = ...
'''
epoch 1 complete forward and backward pass for ALL the training samples. 

batch_size = number of training samples in one of forward and backward pass

number of iterations number of passes, each pass using [batch_size] number of samples

e.g. 100 samples, batch_size=20 ---> 100/20 = 5 iterations for 1 epoch
'''

'''
The number of iteration per epoch is calculated by number_of_samples / batch_size . 

So if you have 1280 samples in your Dataset and set a batch_size=128 , 

your DataLoader will return 10 batches à 128 samples. 

Therefore the iterations will increase by 10
'''

In [6]:
import torch
import torchvision 
from torch.utils.data import Dataset, DataLoader
import numpy as np 
import math 

In [9]:
class WineDataset(Dataset):
    def __init__(self):
        # data loading 
        xy = np.loadtxt('data/wine/wine.csv', delimiter=",", dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:,1:]) # all the samples and the second column
        self.y = torch.from_numpy(xy[:,[0]]) # n_samples, 1
        self.n_samples = xy.shape[0]
    def __getitem__(self,index):
        # dataset[0] allows for indexing later
        return self.x[index], self.y[index]
    def __len__(self):
        #len(dataset)
        return self.n_samples
    
dataset = WineDataset()
first_data = dataset[0]
#features, labels = first_data 
#print(features, labels)
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)


# convert it to iterator 
# uncomment to have a look at your data
#dataiter = iter(dataloader)
#data = dataiter.next()

#features, labels = data 
#print(features, labels)

# Dummy training loop 
num_epochs = 10 
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)

print(n_iterations, total_samples)

for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        # forward and backward 
        if (i + 1) % 5 == 0: 
            print('epoch: {}/{}, step: {}/{}, input= {}'.format(epoch +1,num_epochs, i+1,n_iterations,inputs.shape))


45 178
epoch: 1/10, step: 5/45, input= torch.Size([4, 13])
epoch: 1/10, step: 10/45, input= torch.Size([4, 13])
epoch: 1/10, step: 15/45, input= torch.Size([4, 13])
epoch: 1/10, step: 20/45, input= torch.Size([4, 13])
epoch: 1/10, step: 25/45, input= torch.Size([4, 13])
epoch: 1/10, step: 30/45, input= torch.Size([4, 13])
epoch: 1/10, step: 35/45, input= torch.Size([4, 13])
epoch: 1/10, step: 40/45, input= torch.Size([4, 13])
epoch: 1/10, step: 45/45, input= torch.Size([2, 13])
epoch: 2/10, step: 5/45, input= torch.Size([4, 13])
epoch: 2/10, step: 10/45, input= torch.Size([4, 13])
epoch: 2/10, step: 15/45, input= torch.Size([4, 13])
epoch: 2/10, step: 20/45, input= torch.Size([4, 13])
epoch: 2/10, step: 25/45, input= torch.Size([4, 13])
epoch: 2/10, step: 30/45, input= torch.Size([4, 13])
epoch: 2/10, step: 35/45, input= torch.Size([4, 13])
epoch: 2/10, step: 40/45, input= torch.Size([4, 13])
epoch: 2/10, step: 45/45, input= torch.Size([2, 13])
epoch: 3/10, step: 5/45, input= torch.Siz