<h1>Question 1</h1>

1.Get Tiny ImageNet dataset<br>
2 Create your own dataset from the Tiny ImageNet dataset as follows:<br>
o Consider only 100 classes from the dataset<br>
o From each class, take 500 images<br>
o Split your dataset into training, testing, and validation sets such that training
set contains 30,000, testing contains, 10,000, and validation set contains
10,000 images<br>
o Prepare your data for your model as described in section 2 of AlexNet paper.<br>
o Important: You code must have appropriate comments.

In [15]:
#Import the required modules
import numpy as np
import deeplake
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch
from PIL import Image
from tqdm import tqdm
import random



In [16]:
#import the dataset using deeplake
ds = deeplake.load("hub://activeloop/tiny-imagenet-train")

\

Opening dataset in read-only mode as you don't have write permissions.


|

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/tiny-imagenet-train



|

hub://activeloop/tiny-imagenet-train loaded successfully.



 

In [18]:
#print available tensors in the dataset
print("Available tensors:", list(ds.tensors.keys()))
total_samples = len(ds)
#Printing no of images in the dataset
print("No of samples in the dataset :",total_samples)

Available tensors: ['bbox', 'image', 'label']
No of samples in the dataset : 100000


In [21]:
# Get no of classes in the dataset
labels = ds.label.numpy().flatten()
count_classes= len(np.unique(labels))

print(f"Tiny ImageNet has {count_classes} classes")

Tiny ImageNet has 200 classes


In [26]:
#Fetch randomly 100 classes in the tinyimage dataset
selected_classes = np.random.choice(count_classes , size =100, replace=False)
print(f"No of classes selected : {len(selected_classes)}")

No of classes selected : 100


In [28]:
# perform mapping from old label to new label (0 to 99)
old_to_new_label = {old_label: idx for idx, old_label in enumerate(selected_classes)}
new_to_old_label = {idx: old_label for old_label, idx in old_to_new_label.items()}

In [30]:
#For each fetch 500 images randomly
selected_indices = []
for cls in selected_classes :
    #All indices where label and cls are equal
    cls_indices = np.where(labels == cls)[0]
    #Randomly take 500 images without replacement
    sampled = np.random.choice(cls_indices, size = 500, replace = False)
    selected_indices.extend(sampled)

#Convert to numpy array and shuffle
selected_indices = np.array(selected_indices)
np.random.shuffle(selected_indices) 

In [32]:
#Verify the total count after shuffling
assert len(selected_indices) == 50000
print(f"Total samples in dataset : {len(selected_indices)}")

Total samples in dataset : 50000


In [35]:
#set indices values for each dataset
train_index = selected_indices[:30000]
val_index = selected_indices[30000:40000]
test_index = selected_indices[40000:50000]

#split the data using the indexes fecthed
train_ds = ds[train_index.tolist()]
val_ds = ds[val_index.tolist()]
test_ds = ds[test_index.tolist()]

#dataset sizes after the split
# Confirm sizes
print(f"Training set size: {len(train_ds)}")
print(f"Validation set size: {len(val_ds)}")
print(f"Test set size: {len(test_ds)}")

Training set size: 30000
Validation set size: 10000
Test set size: 10000


In [39]:
#Implement Alexnet Preprocessing( computing mean RGB values of Training dataset)
def compute_training_mean(dataset, target_size=227, batch_size=100):
    total_sum = np.zeros(3)
    total_pixels = 0

    for i in tqdm(range(0, len(dataset), batch_size), desc="Computing mean"):
        # Load images as list (handles different image shapes)
        image_list = dataset[i:i+batch_size].image.numpy(aslist=True)

        for img in image_list:
            # Handle different image shapes
            if img.ndim == 2:
                # Grayscale: (H, W)  -- >convert to (H, W, 3)
                img = np.stack([img] * 3, axis=-1)
            elif img.ndim == 3 and img.shape[2] == 1:
                # Grayscale with channel dim: (H, W, 1) -- > (H, W, 3)
                img = np.concatenate([img] * 3, axis=-1)
            elif img.ndim == 3 and img.shape[2] == 3:
                # if already then pass
                pass
            else:
                raise ValueError(f"Unexpected image shape: {img.shape}")

            # Ensure dtype is uint8
            if img.dtype != np.uint8:
                img = img.astype(np.uint8)

            # Convert to PIL and resize to the target_size
            pil_img = Image.fromarray(img, mode='RGB')
            resized = pil_img.resize((target_size, target_size), Image.BILINEAR)
            resized_np = np.array(resized)  # Shape: (227, 227, 3)

            # Accumulate the sum over height and width 
            total_sum += resized_np.sum(axis=(0, 1))  # Sum over H, W → (3,)
            total_pixels += resized_np.shape[0] * resized_np.shape[1]  # 227*227
    mean_rgb = total_sum / total_pixels
    return mean_rgb

In [41]:
#Caliculate the mean of training dataset
train_mean_rgb = compute_training_mean(train_ds, target_size=227).tolist()
print(f"Training mean (RGB): {train_mean_rgb}")

Computing mean: 100%|██████████| 300/300 [54:50<00:00, 10.97s/it] 

Training mean (RGB): [123.56455408928306, 114.61865774935796, 100.27033949232471]





In [49]:
#Function for alexnet transformer 
#Return transform that
# - converts the numpy array to PIL
# - resize smaller edge to target_size
# - Center crops to target_size
# - converts to tensor (0-1) then scales back to 0-255
# - Subtracts per channel mean not std scaling as in original AlexNet
def alexnet_transform(mean_rgb, target_size=227):
    return transforms.Compose([
        transforms.ToPILImage(), #converts numpy array (H,W,C) to PIL
        transforms.Resize(target_size), #resize the smaller edge to 227
        transforms.CenterCrop(target_size), #Ensure it has 227x227
        transforms.ToTensor(), # Converts (H,W,C) and scale between 0 to 1
        transforms.Lambda(lambda x:x *255.0), #Undo ToTensor scaling
        transforms.Normalize(
            mean=mean_rgb, #Subract with training mean
            std=[1.0,1.0,1.0]  #No std scaling
            ),
        ])                        

In [51]:
#a Custom dataset wrapper for Deeplake with alexnet processing remaps original lables to 0-99 based on selected_classes
class AlexNetDataset(Dataset):
    " A PyTorch Dataset wrapper for DeepLake datasets, compatible with AlexNet preprocessing."

    def __init__(self, ds, transform = None):
        self.ds = ds
        self.transform = transform
    
    def __len__(self):
        return len(self.ds)

    def __getitem__(self,idx):
        sample = self.ds[idx]
        image = sample.image.numpy()
        label = sample.label.numpy.item() 

        if self.transform :
            image = self.transform(image)
        return image,label

In [53]:
#create transform using calculated mean on the training dataset
train_transform = alexnet_transform(train_mean_rgb)
val_transform = alexnet_transform(train_mean_rgb)
test_transform = alexnet_transform(train_mean_rgb)

#wrap using each datasets created
train_dataset = AlexNetDataset(train_ds, transform=train_transform)
val_dataset = AlexNetDataset(val_ds, transform = val_transform)
test_dataset = AlexNetDataset(test_ds, transform = test_transform)

#Verfiy the dataset sizes
print(f"Samples in Training dataset : {len(train_dataset)}")
print(f"Samples in Validation dataset : {len(val_dataset)}")
print(f"Samples in Test dataset : {len(test_dataset)}")

Samples in Training dataset : 30000
Samples in Validation dataset : 10000
Samples in Test dataset : 10000
