In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np

## Preparing a toy dataset (MNIST).

In [3]:
# Extracting data directly from csv.

mnist_train = np.genfromtxt('./data/mnist_train.csv', delimiter=',')
mnist_test = np.genfromtxt('./data/mnist_test.csv', delimiter=',')

# Splitting the data into features and labels.

X_train = mnist_train[1:,1:]
y_train = mnist_train[1:,0].astype(int)

X_test = mnist_test[1:,1:]
y_test = mnist_test[1:,0].astype(int)

# Splitting off a part of training set for validation.

N_train = y_train.shape[0]
order = np.random.permutation(N_train)
split_index = int(N_train*0.8)


X_valid = X_train[order][split_index:]
X_train = X_train[order][:split_index]
y_valid = y_train[order][split_index:]
y_train = y_train[order][:split_index]

# Squeezing the data into [0,1], which improves training.

X_train /= 255
X_valid /= 255
X_test /= 255

## Model creation and training loop.

In [4]:
from dl import Module # Always import this if you wish to build your own Module using existing Modules. 
from dl.modules import Linear, ReLU # Typical modules in a multilayer perceptron.

In [5]:
# Defining a toy model.
class MLP(Module):

    def __init__(self):
        super().__init__()

        # Assign your submodules here as attributes.
        self.lin1 = Linear(784, 128)
        self.relu1 = ReLU()
        self.lin2 = Linear(128, 64)
        self.relu2 = ReLU()
        self.lin3 = Linear(64, 10)

    def forward(self, X):

        # Use your submodules to compute something for forward. Autograd happens here.
        X = self.lin1(X)
        X = self.relu1(X)
        X = self.lin2(X)
        X = self.relu2(X)
        X = self.lin3(X)

        return X

In [6]:
model = MLP()
model.print() # There is a print method to view your model. 

MLP
    lin1 : Linear(input_size = 784, output_size = 128, param_init = 'xavier')
    relu1 : ReLU
    lin2 : Linear(input_size = 128, output_size = 64, param_init = 'xavier')
    relu2 : ReLU
    lin3 : Linear(input_size = 64, output_size = 10, param_init = 'xavier')


In [7]:
from dl.optimizers import SGD # We need this to update model weights, aka train the model.
optimizer = SGD(model.parameters(), 0.01)

In [8]:
from dl import Variable # The datatype passed into any Module or Function must be a Variable. It is within Variables, and specifically the .node attribute, that autograd takes place. 

X_train = Variable(X_train)
X_valid = Variable(X_valid)
X_test = Variable(X_test)

y_train = Variable(y_train)
y_valid = Variable(y_valid)
y_test = Variable(y_test)

In [9]:
from dl.data.iterate import iterate_batches # Passing in our training data into iterate_batches gives us the batches we need for each loss and gradient calculation.
from dl.functions import cross_entropy_loss # We need this to evaluate the quality of our current model weights with respect to a batch.


epochs = 5
for epoch in range(epochs):

    epoch_loss = 0
    for X_batch, y_batch in iterate_batches(X_train, y_train, batch_size=256, shuffle=True):

        features = model(X_batch)
        loss = cross_entropy_loss(features, y_batch)
        loss.backward() # Calculate the gradient (aka, the direction in which to change model weights in order to lower the current loss, as suggested by this immediate batch).

        epoch_loss += loss.data

        optimizer.update_parameters()
        optimizer.clear_grad()


    print("Epoch loss:", epoch_loss/epochs)


Epoch loss: 72.14921462103425
Epoch loss: 39.696954810601106
Epoch loss: 24.77514213025257
Epoch loss: 19.35460897137209
Epoch loss: 16.75968445122756
