In [26]:
# time-consuming to do gradient calculatuions on training data of large datasets
# divide samples into smaller batches
# loop over epoch: loop over batches -> optimization based only on the batches

In [27]:
# epoch = one forward and backward pass of ALL training samples
# batch_size = number of training samples used in one forward/backward pass
# number of iterations = number of passes, each pass (forward+backward) using [batch_size] number of samples
# e.g : 100 samples, batch_size=20 -> 100/20=5 iterations for 1 epoch

In [28]:
import torch
import torchvision

from torch.utils.data import Dataset, DataLoader

import numpy as np
import math

In [29]:
# Dataset class
class WineDataset(Dataset):
    
    def __init__(self):
        # data loading
        xy = np.loadtxt('./Data/wine.csv', delimiter=',', dtype = np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:,1:])
        self.y = torch.from_numpy(xy[:,[0]]) # (n_samples, 1)
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        # allow for indexing: dataset[0]
        return self.x[index], self.y[index]
        
    def __len__(self):
        # len(dataset)
        return self.n_samples

In [30]:
# get the dataset
dataset = WineDataset()

first_data = dataset[0]
features, labels = first_data
print(features, labels)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


In [31]:
# the dataloader
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True)

In [32]:
# convert to iterator
dataiter = iter(dataloader)
data = next(dataiter)
features, labels = data
print(features, labels)

tensor([[1.3230e+01, 3.3000e+00, 2.2800e+00, 1.8500e+01, 9.8000e+01, 1.8000e+00,
         8.3000e-01, 6.1000e-01, 1.8700e+00, 1.0520e+01, 5.6000e-01, 1.5100e+00,
         6.7500e+02],
        [1.2080e+01, 1.8300e+00, 2.3200e+00, 1.8500e+01, 8.1000e+01, 1.6000e+00,
         1.5000e+00, 5.2000e-01, 1.6400e+00, 2.4000e+00, 1.0800e+00, 2.2700e+00,
         4.8000e+02],
        [1.1450e+01, 2.4000e+00, 2.4200e+00, 2.0000e+01, 9.6000e+01, 2.9000e+00,
         2.7900e+00, 3.2000e-01, 1.8300e+00, 3.2500e+00, 8.0000e-01, 3.3900e+00,
         6.2500e+02],
        [1.3560e+01, 1.7100e+00, 2.3100e+00, 1.6200e+01, 1.1700e+02, 3.1500e+00,
         3.2900e+00, 3.4000e-01, 2.3400e+00, 6.1300e+00, 9.5000e-01, 3.3800e+00,
         7.9500e+02]]) tensor([[3.],
        [2.],
        [2.],
        [1.]])


In [33]:
# training loop
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print(total_samples, n_iterations)

178 45


In [35]:
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        # forward pass
        
        # backward pass
        
        # update
        
        if (i+1)%5 == 0:
            print('Epoch {}, step {}/{}, inputs: {}'.format(epoch+1, i+1, n_iterations, inputs.shape))
            
# 2 Epochs, each step has [4,13]

Epoch 1, step 5/45, inputs: torch.Size([4, 13])
Epoch 1, step 10/45, inputs: torch.Size([4, 13])
Epoch 1, step 15/45, inputs: torch.Size([4, 13])
Epoch 1, step 20/45, inputs: torch.Size([4, 13])
Epoch 1, step 25/45, inputs: torch.Size([4, 13])
Epoch 1, step 30/45, inputs: torch.Size([4, 13])
Epoch 1, step 35/45, inputs: torch.Size([4, 13])
Epoch 1, step 40/45, inputs: torch.Size([4, 13])
Epoch 1, step 45/45, inputs: torch.Size([2, 13])
Epoch 2, step 5/45, inputs: torch.Size([4, 13])
Epoch 2, step 10/45, inputs: torch.Size([4, 13])
Epoch 2, step 15/45, inputs: torch.Size([4, 13])
Epoch 2, step 20/45, inputs: torch.Size([4, 13])
Epoch 2, step 25/45, inputs: torch.Size([4, 13])
Epoch 2, step 30/45, inputs: torch.Size([4, 13])
Epoch 2, step 35/45, inputs: torch.Size([4, 13])
Epoch 2, step 40/45, inputs: torch.Size([4, 13])
Epoch 2, step 45/45, inputs: torch.Size([2, 13])


In [37]:
# some famous datasets are available in torchvision.datasets
# e.g. MNIST, Fashion-MNIST, CIFAR10, COCO

train_dataset = torchvision.datasets.MNIST(root='./Data', train=True, transform=torchvision.transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=3, shuffle=True)