# 1. 3D Tensor dataset

In this notebook, we create a custom dataset class named Random3DTensorDataset that extends PyTorch's Dataset. This dataset generates random 3D tensors and corresponding random binary labels.

In [21]:
import torch
from torch.utils.data import Dataset, DataLoader

class Random3DTensorDataset(Dataset):
    def __init__(self, num_samples: int, tensor_shape: tuple = (3, 32, 32)):
        self.num_samples = num_samples
        self.tensor_shape = tensor_shape

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        tensor = torch.rand(*self.tensor_shape)
        label = torch.randint(0, 2, ())
        return tensor, label

# Create dataset and DataLoader
dataset = Random3DTensorDataset(num_samples=100, tensor_shape=(3, 32, 32))
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)

# Fetch one batch
for batch_tensors, batch_labels in dataloader:
    print("Tensor batch shape:", batch_tensors.shape)
    print("Labels:", batch_labels)
    break

Tensor batch shape: torch.Size([10, 3, 32, 32])
Labels: tensor([0, 0, 0, 1, 1, 1, 0, 1, 0, 0])


# 2. Datastreamers

In [22]:
import torch
import random

class BaseDatastreamer:
    def __init__(self, dataset, batchsize, preprocessor=None, max_batches=None):
        self.dataset = dataset
        self.batchsize = batchsize
        self.preprocessor = preprocessor
        self.max_batches = max_batches

    def stream(self):
        batch_count = 0
        while True:
            if self.max_batches and batch_count >= self.max_batches:
                break

            indices = list(range(len(self.dataset)))
            random.shuffle(indices)
            for i in range(0, len(indices), self.batchsize):
                batch_indices = indices[i: i + self.batchsize]
                batch = [self.dataset[idx] for idx in batch_indices]
                
                batch_tensors, batch_labels = zip(*batch)  
                
                batch_tensors = torch.stack(batch_tensors)  
                batch_labels = torch.tensor(batch_labels)
                
                if self.preprocessor:
                    batch_tensors = self.preprocessor(batch_tensors)

                yield batch_tensors, batch_labels
                batch_count += 1


def batch_processor(batch_tensors):
    return batch_tensors * 2

# Create dataset
dataset = Random3DTensorDataset(num_samples=100, tensor_shape=(3, 32, 32))

# Create a datastreamer with a limit of 5 batches
streamer = BaseDatastreamer(dataset=dataset, batchsize=10, preprocessor=batch_processor, max_batches=5)

data_gen = streamer.stream()

for _ in range(5):
    batch_tensors, batch_labels = next(data_gen)
    print(batch_tensors.shape)
    print(batch_labels)
    print("\n")



torch.Size([10, 3, 32, 32])
tensor([0, 1, 0, 0, 1, 0, 0, 1, 0, 1])


torch.Size([10, 3, 32, 32])
tensor([0, 1, 0, 0, 1, 1, 1, 1, 0, 1])


torch.Size([10, 3, 32, 32])
tensor([1, 0, 0, 0, 1, 1, 0, 1, 1, 1])


torch.Size([10, 3, 32, 32])
tensor([1, 0, 0, 0, 1, 0, 1, 1, 1, 0])


torch.Size([10, 3, 32, 32])
tensor([1, 0, 0, 0, 0, 1, 0, 0, 1, 1])




# 3. Tune the network

In [33]:
from mads_datasets import DatasetFactoryProvider, DatasetType

from mltrainer.preprocessors import BasePreprocessor
from mltrainer import imagemodels, Trainer, TrainerSettings, ReportTypes, metrics

import torch.optim as optim
import gin

In [34]:
gin.parse_config_file("model.gin")

ParsedConfigFileIncludesAndImports(filename='model.gin', imports=['gin.torch.external_configurables'], includes=[])

In [35]:
preprocessor = BasePreprocessor()
fashionfactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
streamers = fashionfactory.create_datastreamer(batchsize=64, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
trainstreamer = train.stream()
validstreamer = valid.stream()

[32m2025-02-17 08:58:39.813[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /Users/rimansingh/.cache/mads_datasets/fashionmnist[0m
[32m2025-02-17 08:58:39.823[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at /Users/rimansingh/.cache/mads_datasets/fashionmnist/fashionmnist.pt[0m


In [36]:
print(gin.config_str())

import gin.torch.external_configurables

# Parameters for NeuralNetwork:
NeuralNetwork.num_classes = 10
NeuralNetwork.units1 = 512
NeuralNetwork.units2 = 16



In [27]:
accuracy = metrics.Accuracy()

In [28]:
import torch
gin.parse_config_file("model.gin")

units = [256, 128, 64]
loss_fn = torch.nn.CrossEntropyLoss()

settings = TrainerSettings(
    epochs=5,
    metrics=[accuracy],
    logdir="models",
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.TENSORBOARD, ReportTypes.GIN],
)

for unit1 in units:
    for unit2 in units:
        gin.bind_parameter("NeuralNetwork.units1", unit1)
        gin.bind_parameter("NeuralNetwork.units2", unit2)

        model = imagemodels.NeuralNetwork()
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optim.Adam,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau
        )
        trainer.loop()


[32m2025-02-17 08:36:36.106[0m | [1mINFO    [0m | [36mmltrainer.settings[0m:[36mcheck_path[0m:[36m61[0m - [1mCreated logdir /Users/rimansingh/University/sem-3/Adv-AI/IECS-Advanced-AI-Rimandeep/1_pytorch/models[0m
[32m2025-02-17 08:36:36.163[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to models/20250217-083636[0m
[32m2025-02-17 08:36:36.166[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m72[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/5 [00:00<?, ?it/s]

100%|[38;2;30;71;6m██████████[0m| 937/937 [00:11<00:00, 79.29it/s]
[32m2025-02-17 08:36:48.773[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 0 train 0.5111 test 0.4178 metric ['0.8433'][0m
100%|[38;2;30;71;6m██████████[0m| 937/937 [00:15<00:00, 60.35it/s]
[32m2025-02-17 08:37:05.261[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 1 train 0.3682 test 0.3834 metric ['0.8585'][0m
100%|[38;2;30;71;6m██████████[0m| 937/937 [00:13<00:00, 72.00it/s]
[32m2025-02-17 08:37:19.507[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 2 train 0.3275 test 0.3545 metric ['0.8733'][0m
100%|[38;2;30;71;6m██████████[0m| 937/937 [00:18<00:00, 50.37it/s]
[32m2025-02-17 08:37:39.251[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mreport[0m:[36m191[0m - [1mEpoch 3 train 0.3036 test 0.3488 metric ['0.8743'][0m
100%|[38;2;30;71;6m██████████[0m| 937/

# Updated Experiment Code