#### Dataset and Dataloaders
*epoch* = 1 forward and backward pass of all training samples  
****batch_size**** = number of training samples in one forward and backward pass  
*number of iteration* = number of passes, each pass using *batch_size* number of samples  
e.g 100 samples, batch size = 20 -> 100/20 = 5 iterations for 1 epoch

In [3]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [7]:
class WineDataset(Dataset):
    def __init__(self):
        # data loading
        xy = np.loadtxt('./data/wine/wine.txt', delimiter=',', dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]]) # of the shape of (n_samples, 1)
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        # allow indexing, like dataset[0]
        return self.x[index], self.y[index]
        
    def __len__(self):
        # len
        return self.n_samples

In [8]:
dataset = WineDataset()
first_data = dataset[0]
features, labels = first_data
features, labels

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]),
 tensor([1.]))

Create DataLoader with DataSet

In [14]:
dataloader = DataLoader(dataset=dataset, batch_size = 4, shuffle = True, num_workers=2)

Apparently DataLoader is also an iterable

In [16]:
?iter

In [18]:
data_iter = iter(dataloader)
data = data_iter.next()
features, labels = data
features, labels

(tensor([[1.4100e+01, 2.1600e+00, 2.3000e+00, 1.8000e+01, 1.0500e+02, 2.9500e+00,
          3.3200e+00, 2.2000e-01, 2.3800e+00, 5.7500e+00, 1.2500e+00, 3.1700e+00,
          1.5100e+03],
         [1.3410e+01, 3.8400e+00, 2.1200e+00, 1.8800e+01, 9.0000e+01, 2.4500e+00,
          2.6800e+00, 2.7000e-01, 1.4800e+00, 4.2800e+00, 9.1000e-01, 3.0000e+00,
          1.0350e+03],
         [1.1840e+01, 8.9000e-01, 2.5800e+00, 1.8000e+01, 9.4000e+01, 2.2000e+00,
          2.2100e+00, 2.2000e-01, 2.3500e+00, 3.0500e+00, 7.9000e-01, 3.0800e+00,
          5.2000e+02],
         [1.3080e+01, 3.9000e+00, 2.3600e+00, 2.1500e+01, 1.1300e+02, 1.4100e+00,
          1.3900e+00, 3.4000e-01, 1.1400e+00, 9.4000e+00, 5.7000e-01, 1.3300e+00,
          5.5000e+02]]),
 tensor([[1.],
         [1.],
         [2.],
         [3.]]))

In [25]:
# dummy training loop, with data loader
num_epoch = 2
total_samples = len(dataset)
batch_size = 4
n_iters = math.ceil(total_samples/batch_size)
total_samples, n_iters

(178, 45)

In [26]:
for epoch in range(num_epoch):
    for i, (inputs, labels) in enumerate(dataloader):
        # dummpy training loop, so we just log things
        if (i + 1) % 5 == 0:
            print(f'epoch {epoch+1}/{num_epoch}, step {i+1}/{n_iters}, inputs shape {inputs.shape}')

epoch 1/2, step 5/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 10/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 15/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 20/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 25/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 30/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 35/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 40/45, inputs shape torch.Size([4, 13])
epoch 1/2, step 45/45, inputs shape torch.Size([2, 13])
epoch 2/2, step 5/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 10/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 15/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 20/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 25/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 30/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 35/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 40/45, inputs shape torch.Size([4, 13])
epoch 2/2, step 45/45, inputs shape torch.Size([2,

Note at the end of an epoch, the batch may not be a full batch.