<a href="https://colab.research.google.com/github/kameshcodes/deep-learning-codes/blob/main/Dataloader_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np

data = np.loadtxt("https://gist.githubusercontent.com/tijptjik/9408623/raw/b237fa5848349a14a14e5d4107dc7897c21951f5/wine.csv", delimiter=",", skiprows=1)
data

array([[1.000e+00, 1.423e+01, 1.710e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.000e+00, 1.320e+01, 1.780e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.000e+00, 1.316e+01, 2.360e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [3.000e+00, 1.327e+01, 4.280e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [3.000e+00, 1.317e+01, 2.590e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [3.000e+00, 1.413e+01, 4.100e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

- Using the entire dataset for gradient calculation and model optimization in every epoch is highly time-consuming.

- If dataset is large, it is more efficient to divide the samples into so called smaller batches and optimize our model using these small batches rather than using entire data.


$\text{Let's define some terms:}$

- **epochs:**  1 forward and backward pass of ALL training samples.
- **batch_size:** number of training samples in one forward and backward pass
- **number of iteration:** number of passes, each pass using [batch_size] number of samples

<br>
$\textbf{100 Samples, batch_size=200}\rightarrow\text{100/5 = 5 iteration for 1 epoch}$



In [7]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [38]:
class WineDataset(Dataset):

  def __init__(self):
    xy = np.loadtxt("https://gist.githubusercontent.com/tijptjik/9408623/raw/b237fa5848349a14a14e5d4107dc7897c21951f5/wine.csv", dtype=np.float32, delimiter=",", skiprows=1)
    self.x = torch.from_numpy(xy[:, 1:])
    self.y = torch.from_numpy(xy[:, 0])
    self.n_samples = xy.shape[0]

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.n_samples


In [39]:
dataset = WineDataset()

In [28]:
first_sample = dataset[0]
features, labels = first_sample
print(features, labels)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor(1.)


In [59]:
dataloader = DataLoader(dataset= dataset, batch_size=4, shuffle=True, num_workers=2)

In [60]:
dataiter = iter(dataloader)
data = dataiter.__next__()
features, labels = data
print(features, labels)

tensor([[1.3480e+01, 1.8100e+00, 2.4100e+00, 2.0500e+01, 1.0000e+02, 2.7000e+00,
         2.9800e+00, 2.6000e-01, 1.8600e+00, 5.1000e+00, 1.0400e+00, 3.4700e+00,
         9.2000e+02],
        [1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03],
        [1.3240e+01, 3.9800e+00, 2.2900e+00, 1.7500e+01, 1.0300e+02, 2.6400e+00,
         2.6300e+00, 3.2000e-01, 1.6600e+00, 4.3600e+00, 8.2000e-01, 3.0000e+00,
         6.8000e+02],
        [1.2430e+01, 1.5300e+00, 2.2900e+00, 2.1500e+01, 8.6000e+01, 2.7400e+00,
         3.1500e+00, 3.9000e-01, 1.7700e+00, 3.9400e+00, 6.9000e-01, 2.8400e+00,
         3.5200e+02]]) tensor([1., 1., 1., 2.])


In [62]:
num_epochs = 2
total_samples = len(dataset)
batch_size = 4
n_iterations = math.ceil(total_samples/batch_size)
print(total_samples, n_iterations)

178 45


In [66]:
for epoch in range(num_epochs):
  for i, (inputs, labels) in enumerate(dataloader):
    if (i+1)%5==0:
      print(f"epoch {epoch+1}/{num_epochs}, step={i+1}/{n_iterations}, inputs = {inputs.shape}")

epoch 1/2, step=5/45, inputs = torch.Size([4, 13])
epoch 1/2, step=10/45, inputs = torch.Size([4, 13])
epoch 1/2, step=15/45, inputs = torch.Size([4, 13])
epoch 1/2, step=20/45, inputs = torch.Size([4, 13])
epoch 1/2, step=25/45, inputs = torch.Size([4, 13])
epoch 1/2, step=30/45, inputs = torch.Size([4, 13])
epoch 1/2, step=35/45, inputs = torch.Size([4, 13])
epoch 1/2, step=40/45, inputs = torch.Size([4, 13])
epoch 1/2, step=45/45, inputs = torch.Size([2, 13])
epoch 2/2, step=5/45, inputs = torch.Size([4, 13])
epoch 2/2, step=10/45, inputs = torch.Size([4, 13])
epoch 2/2, step=15/45, inputs = torch.Size([4, 13])
epoch 2/2, step=20/45, inputs = torch.Size([4, 13])
epoch 2/2, step=25/45, inputs = torch.Size([4, 13])
epoch 2/2, step=30/45, inputs = torch.Size([4, 13])
epoch 2/2, step=35/45, inputs = torch.Size([4, 13])
epoch 2/2, step=40/45, inputs = torch.Size([4, 13])
epoch 2/2, step=45/45, inputs = torch.Size([2, 13])
