In [1]:
%load_ext autoreload
%autoreload 2

## Imports.

In [None]:
import numpy as np

from dl import Module # Always import this if you wish to build your own Module using existing Modules. 

from dl.modules import Linear, ReLU # Typical modules found in a multilayer perceptron.

from dl.optimizers import SGD # We need this to update model weights.

from dl.data import BatchLoader, train_val_split # We will pass in in our training data into a batchloader, similar to the PyTorch DataLoader.
from dl.data.transforms import ToVariable, ToFloat, Normalize, ComposeTransforms # The transformations done to raw numpy image and label instances, before being passed into the model.

from dl.functions import cross_entropy_loss # We need this to evaluate the quality of our current model weights with respect to a batch.

from dl import Variable # The datatype used by this framework, which wraps NumPy arrays. It is within Variables, and specifically the .node attribute, that autograd takes place. 

## Preparing a toy dataset (MNIST).

In [None]:
# Extracting data directly from csv.

mnist_train = np.genfromtxt('./data/mnist_train.csv', delimiter=',')
mnist_test = np.genfromtxt('./data/mnist_test.csv', delimiter=',')

# Splitting the data into features and labels.

X_train_full = mnist_train[1:,1:]
y_train_full = mnist_train[1:,0].astype(int)

X_test = mnist_test[1:,1:]
y_test = mnist_test[1:,0].astype(int)

In [None]:
# Compute normalization statistics.
X_for_stats = X_train_full.astype(np.float32) / 255.0
mean = np.mean(X_for_stats)
std = np.std(X_for_stats)


image_transforms = ComposeTransforms([ToFloat(), Normalize(mean, std), ToVariable()])
label_transforms = ToVariable()

In [None]:
class MNIST:
    
    def __init__(self, images, labels, image_transforms=None, label_transforms=None):
        super().__init__()
        
        self.images = images
        self.labels = labels
        self.image_transforms = image_transforms
        self.label_transforms = label_transforms
        
    def __getitem__(self, idx):
        
        image = self.images[idx]
        label = self.labels[idx]
        
        if self.image_transforms is not None:
            image = self.image_transforms(image)
        
        if self.label_transforms is not None:
            label = self.label_transforms(label)
            
        return image, label
        
    def __len__(self):
        return self.images.shape[0]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(48000, 784))

## Model definition and training loop.

In [5]:
# Defining a toy model.
class MLP(Module):

    def __init__(self):
        super().__init__()

        # Assign your submodules here as attributes.
        self.lin1 = Linear(784, 128)
        self.relu1 = ReLU()
        self.lin2 = Linear(128, 64)
        self.relu2 = ReLU()
        self.lin3 = Linear(64, 10)

    def forward(self, X):

        # Use your submodules to compute something for forward. Autograd happens here.
        X = self.lin1(X)
        X = self.relu1(X)
        X = self.lin2(X)
        X = self.relu2(X)
        X = self.lin3(X)

        return X

In [6]:
model = MLP()
model.print() # There is a print method to view your model. 

MLP
    lin1 : Linear(input_size = 784, output_size = 128, param_init = 'xavier')
    relu1 : ReLU
    lin2 : Linear(input_size = 128, output_size = 64, param_init = 'xavier')
    relu2 : ReLU
    lin3 : Linear(input_size = 64, output_size = 10, param_init = 'xavier')


In [None]:
optimizer = SGD(model.parameters(), 0.01)

In [None]:
epochs = 5
for epoch in range(epochs):

    epoch_loss = 0
    iterations = 0
    for X_batch, y_batch in iterate_batches(X_train, y_train, batch_size=256, shuffle=True):

        # Compute features and loss.
        features = model(X_batch)
        loss = cross_entropy_loss(features, y_batch)

        # Update model parameters.
        optimizer.update_parameters()
        loss.backward() # Calculate the gradient (aka, the direction in which to change model weights in order to lower the current loss, as suggested by this immediate batch).
        optimizer.clear_grad()

        epoch_loss += loss.data
        iterations += 1


    print("Epoch loss:", epoch_loss/iterations)


Epoch loss: 1.760192656193778
Epoch loss: 0.9028898164895317
Epoch loss: 0.6118096185742777
Epoch loss: 0.5001693849202595
Epoch loss: 0.44010433171269275
