In [54]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Imports.

In [55]:
import numpy as np

from dl import Module # Always import this if you wish to build your own Module using existing Modules. 

from dl.modules import Linear, ReLU # Typical modules found in a multilayer perceptron.

from dl.optimizers import SGD # We need this to update model weights.

from dl.data import BatchLoader, train_val_split, accuracy # We will pass in in our training data into a batchloader, similar to the PyTorch DataLoader.
from dl.data.transforms import ToVariable, ToFloat, Normalize, ComposeTransforms # The transformations done to raw numpy image and label instances, before being passed into the model.

from dl.functions import cross_entropy_loss # We need this to evaluate the quality of our current model weights with respect to a batch.

from dl import Variable # The datatype used by this framework, which wraps NumPy arrays. It is within Variables, and specifically the .node attribute, that autograd takes place. 

# For vizualizing output
from tqdm import tqdm

# For downloading MNIST
from sklearn.datasets import fetch_openml
import os

## Preparing a toy dataset (MNIST).

In [56]:
# Optional: Set a data cache directory
data_home = os.path.join("data", "mnist")

# Download and cache MNIST (70000 samples)
mnist = fetch_openml('mnist_784', version=1, data_home=data_home, as_frame=False)

# Access the data
X, y = mnist.data, mnist.target  # X is (70000, 784), y is (70000,)
y = y.astype(int)

print(f"Data shape: {X.shape}")
print(f"Labels shape: {y.shape}")

X_train_full, y_train_full = X[:60000], y[:60000]
X_train, y_train, X_val, y_val = train_val_split(X_train_full, y_train_full, 0.1)

X_test, y_test = X[60000:], y[60000:]

Data shape: (70000, 784)
Labels shape: (70000,)


In [57]:
# Define image transforms (division by 255.0 and normalization)

# Compute normalization statistics.
X_for_stats = X_train_full.astype(np.float32) / 255.0
mean = np.mean(X_for_stats)
std = np.std(X_for_stats)

image_transforms = ComposeTransforms([ToFloat(255.0), Normalize(mean, std), ToVariable()])
label_transforms = ToVariable()

In [58]:
# Define the dataset and batchloaders.

class MNIST:
    
    def __init__(self, images, labels, image_transforms=None, label_transforms=None):
        super().__init__()
        
        self.images = images
        self.labels = labels
        self.image_transforms = image_transforms
        self.label_transforms = label_transforms
        
    def __getitem__(self, idx):
        
        image = self.images[idx]
        label = self.labels[idx]
        
        if self.image_transforms is not None:
            image = self.image_transforms(image)
        
        if self.label_transforms is not None:
            label = self.label_transforms(label)
            
        return image, label
        
    def __len__(self):
        return self.images.shape[0]
    
BATCH_SIZE = 256
    
batchloaders = {}
batchloaders['train'] = BatchLoader(MNIST(X_train, y_train, image_transforms, label_transforms), batch_size=BATCH_SIZE, shuffle=True)
batchloaders['val'] = BatchLoader(MNIST(X_val, y_val, image_transforms, label_transforms), batch_size=BATCH_SIZE, shuffle=False)
batchloaders['test'] = BatchLoader(MNIST(X_test, y_test, image_transforms, label_transforms), batch_size=BATCH_SIZE, shuffle=False)

## Model definition and training loop.

In [59]:
# Defining a toy model.
class MLP(Module):

    def __init__(self):
        super().__init__()

        # Assign your submodules here as attributes.
        self.lin1 = Linear(784, 128)
        self.relu1 = ReLU()
        self.lin2 = Linear(128, 64)
        self.relu2 = ReLU()
        self.lin3 = Linear(64, 10)

    def forward(self, X):

        # Use your submodules to compute something for forward. Autograd happens here.
        X = self.lin1(X)
        X = self.relu1(X)
        X = self.lin2(X)
        X = self.relu2(X)
        X = self.lin3(X)

        return X

In [60]:
model = MLP()
model.print() # There is a print method to view your model. 

MLP
    lin1 : Linear(input_size = 784, output_size = 128, param_init = 'xavier')
    relu1 : ReLU
    lin2 : Linear(input_size = 128, output_size = 64, param_init = 'xavier')
    relu2 : ReLU
    lin3 : Linear(input_size = 64, output_size = 10, param_init = 'xavier')


In [61]:
model = MLP()
optimizer = SGD(model.parameters(), learning_rate=0.01, weight_decay=1e-4 , momentum=0.9)

In [62]:
epochs = 10
train_losses = []
val_losses = []

train_accuracies = []
val_accuracies = []

for epoch in range(epochs):
    
    print(f"\nEpoch {epoch + 1}/{epochs}")
    
    # Train
    
    train_loss = 0
    train_correct = 0
    train_total = 0
    model.enable_grad()
    pbar = tqdm(batchloaders['train'], desc="Training", leave=False)
    for X_batch, y_batch in pbar:
        
        # Compute features and loss.
        features = model(X_batch)
        loss = cross_entropy_loss(features, y_batch)
       
        # Update model parameters.
        optimizer.clear_grad()
        loss.backward()
        optimizer.update_parameters()
    
        train_loss += loss.data * len(y_batch.data)
        train_correct += accuracy(features, y_batch)
        train_total += len(y_batch.data)
        
        # Update tqdm bar with batch loss
        pbar.set_postfix(loss=loss.data)
        
    train_losses.append(train_loss / train_total)
    train_accuracies.append(train_correct / train_total)
    
    # Validate
    
    val_loss = 0
    val_correct = 0
    val_total = 0
    model.disable_grad()
    for X_batch, y_batch in tqdm(batchloaders['val'], desc="Validating", leave=False):
        
        features = model(X_batch)
        loss = cross_entropy_loss(features, y_batch)
        
        val_loss += loss.data * len(y_batch.data)
        val_correct += accuracy(features, y_batch)
        val_total += len(y_batch.data)
        
    val_losses.append(val_loss / val_total)
    val_accuracies.append(val_correct / val_total)
    print(f"Train Loss: {train_losses[-1]:.4f} | Val Loss: {val_losses[-1]:.4f}")



Epoch 1/10


                                                                        

Train Loss: 0.4316 | Val Loss: 0.2077

Epoch 2/10


                                                                         

Train Loss: 0.1843 | Val Loss: 0.1611

Epoch 3/10


                                                                         

Train Loss: 0.1386 | Val Loss: 0.1349

Epoch 4/10


                                                                         

Train Loss: 0.1116 | Val Loss: 0.1100

Epoch 5/10


                                                                         

Train Loss: 0.0927 | Val Loss: 0.1058

Epoch 6/10


                                                                         

Train Loss: 0.0788 | Val Loss: 0.0996

Epoch 7/10


                                                                         

Train Loss: 0.0683 | Val Loss: 0.0940

Epoch 8/10


                                                                         

Train Loss: 0.0607 | Val Loss: 0.0895

Epoch 9/10


                                                                         

Train Loss: 0.0534 | Val Loss: 0.0911

Epoch 10/10


                                                                         

Train Loss: 0.0468 | Val Loss: 0.0874




In [63]:
test_loss = 0
test_correct = 0
test_total = 0
model.disable_grad()
for X_batch, y_batch in batchloaders['test']:
    
    features = model(X_batch)
    loss = cross_entropy_loss(features, y_batch)
    test_loss += loss.data * len(y_batch.data)
    test_correct += accuracy(features, y_batch)
    test_total += len(y_batch.data)
    
print(f"Test Loss: {(test_loss / test_total):.4f} | Test Accuracy: {(test_correct / test_total):.4f}")

Test Loss: 0.0819 | Test Accuracy: 0.9735
