<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Objective" data-toc-modified-id="Objective-1">Objective</a></span></li><li><span><a href="#Datasets-&amp;-DataLoaders" data-toc-modified-id="Datasets-&amp;-DataLoaders-2">Datasets &amp; DataLoaders</a></span></li><li><span><a href="#Testing-It-Out" data-toc-modified-id="Testing-It-Out-3">Testing It Out</a></span></li></ul></div>

In [53]:
import numpy as np
from sklearn.datasets import load_iris

---------

# Objective

The goal of this notebook is to build our own custom `Dataset` and `DataLoader` classes with the same api as `PyTorch`. 

------------

# Datasets & DataLoaders

In [54]:
class Dataset:
    """Container for returning inputs and targets."""
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def __setitem__(self, idx, val):
        self.X[idx], self.y[idx] = val
                
    def __len__(self):
        return len(self.y)

In [66]:
class DataLoader:
    """Container for returning a mini-batch of inputs and targets."""
    
    def __init__(self, ds, batch_size, shuffle=False):
        self.ds = ds
        self.batch_size = batch_size
        self.shuffle = shuffle
        
    def shuffle_data(self):
        """Shuffle inputs and targets."""
        idxs = np.random.permutation(len(self.ds))
        self.ds = Dataset(*self.ds[idxs])
        
    def __iter__(self):
        """Yield a mini-batch of inputs and targets."""
        if self.shuffle: self.shuffle_data()
        n_batches = len(self.ds) // self.batch_size
        for i in range(n_batches):
            yield self.ds[i * self.batch_size: (i + 1) * self.batch_size]

----------

# Testing It Out

In [67]:
# X, y = load_iris(return_X_y=True)
# X.shape, y.shape

In [68]:
X = np.array([[1, 2], [3, 4], [5, 6]])
y = np.array([0, 1, 0])

In [69]:
# Initialise datasets
ds = Dataset(X, y)

# Initialise dataloader
dl = DataLoader(ds, batch_size=1, shuffle=True)

In [82]:
for x, y in dl:
    print(x, y)

[[3 4]] [1]
[[1 2]] [0]
[[5 6]] [0]


In [84]:
ds.X

array([[1, 2],
       [3, 4],
       [5, 6]])