In [1]:
from abc import ABC, abstractmethod
from __future__ import annotations
import numpy as np
import tensor

In [2]:
# dataset class

class Dataset(ABC):
    """convert any data to tensor type"""
    @abstractmethod
    def __getitem__(self, idx):
        ...
        
    @abstractmethod
    def __len__(self):
        ...

In [3]:
# Subset class

class Subset(Dataset):
    """create a subset of a list of indices from dataset class"""
    def __init__(self, dataset: Dataset, indices: list = None):
        super().__init__()
        self.indices = indices
        self.dataset = dataset

    def __getitem__(self, idx):
        dx = self.indices[idx]
        return self.dataset[dx]

    def __len__(self):
        return len(self.indices)

In [4]:
# dataloader class

class Dataloader:
    '''Dataloader class to create a dataloader'''
    def __init__(self, dataset: Dataset, shuffle: bool, drop_last: bool = True, batch_size: int = 32, seed=42):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.rng = np.random.default_rng(seed)
    
    def __iter__(self):
        N = len(self.dataset)
        indices = list(range(N))
        
        if self.shuffle:
            self.rng.shuffle(indices)

        for i in range(0, N, self.batch_size):
            batch_idx = indices[i: i+self.batch_size]

            batch_samples = [self.dataset[x] for x in batch_idx]
            Xs, ys = zip(*batch_samples)
            
            yield tensor.tensor(Xs), tensor.tensor(ys)

    def __len__(self):
        return len(self.dataset) // self.batch_size

In [29]:
# trainer class

class trainer:
    '''Manages the epoch loop and the batch loop (per epoch) to train a model (using a dataloader to get mini batches) 
    '''
    def __init__(self, model, loss_fn, train_loader, optimizer, val_loader=None, callbacks:list = [None], epochs=32):
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.train_loader = train_loader
        self.epochs = epochs
        if val_loader: self.val_loader = val_loader

        # internal -- to be update and used by callbacks
        self.epoch = 0
        self.global_step = 0
        self.batch_idx = 0
        self._loss = float(0) # latest loss per batch
        self.metrics = dict()
        
    # internal callbacks
    def on_batch_begin(): 
        self.global_step += 1
        batch_size = dataloader.batch_size
        data_len = len(dataloader.dataset)
        pass

        
    def on_batch_end(): pass
    def on_epoch_begin(): pass
    def on_epoch_end(): pass
        
    def fit(self):
        for epoch in range(self.epochs):
            self.on_epoch_begin()
            for i, (X, y) in enumerate(self.train_dataloader):
                self.on_batch_begin()
                
                #fp 
                yhat = self.model(X)
        
                #loss
                loss = self.loss_fn(yhat, y)
        
                # zero grad
                model.zero_grad()
        
                # backprop
                loss.backward()
        
                # step -- update
                optimizer.step()

                self.on_batch_end()
            self.on_epoch_end()
            

In [6]:
# testing dataset
class myData(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __getitem__(self, idx):
        return (self.X[idx], self.y[idx])

    def __len__(self):
        return len(X)

In [7]:
X = np.random.randn(100, 2)
y = np.zeros(100, )

data = myData(X, y)
len(data) # len
data[10] # getitem

(array([-1.81362299, -1.18700452]), np.float64(0.0))

In [8]:
# testing subset
sub = Subset(data, [0, 1, 2, 3, 4, 5])
sub[0], len(sub)

((array([-0.84213977,  2.07469566]), np.float64(0.0)), 6)

In [9]:
# testing dataloader
dataloader = Dataloader(dataset=data, shuffle=True, batch_size = 10)

In [11]:
# testing dataloader
epochs = 5
for e in range(epochs):
    print(f'e={e}')
    for i, (xs, ys) in enumerate(dataloader):
        print('  batch =', i)
        print('   first sample:', xs[0], ys[0])


e=0
  batch = 0
   first sample: tensor(data=[0.9377148  0.23853226], grad=[0. 0.] shape=(2,)) tensor(data=0.0, grad=0.0 shape=())
  batch = 1
   first sample: tensor(data=[ 0.64229688 -0.94382451], grad=[0. 0.] shape=(2,)) tensor(data=0.0, grad=0.0 shape=())
  batch = 2
   first sample: tensor(data=[0.85365835 0.67456539], grad=[0. 0.] shape=(2,)) tensor(data=0.0, grad=0.0 shape=())
  batch = 3
   first sample: tensor(data=[ 0.43967414 -0.92555435], grad=[0. 0.] shape=(2,)) tensor(data=0.0, grad=0.0 shape=())
  batch = 4
   first sample: tensor(data=[ 0.02231461 -0.10005181], grad=[0. 0.] shape=(2,)) tensor(data=0.0, grad=0.0 shape=())
  batch = 5
   first sample: tensor(data=[-0.12066239  0.30567677], grad=[0. 0.] shape=(2,)) tensor(data=0.0, grad=0.0 shape=())
  batch = 6
   first sample: tensor(data=[0.13216014 1.83194883], grad=[0. 0.] shape=(2,)) tensor(data=0.0, grad=0.0 shape=())
  batch = 7
   first sample: tensor(data=[0.41562636 0.10621667], grad=[0. 0.] shape=(2,)) tensor(d