In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import importlib

from convnet.struct.loss import SoftmaxCrossEntropyLoss

project_root = os.path.abspath('/Users/subhojit/workspace/saturn/src')
if project_root not in sys.path:
    sys.path.append(project_root)
import convnet.struct
from convnet.data_loader import CIFAR_10_DataLoader
from convnet.struct.layers import Linear, ReLU, Model, BatchNorm1d
from convnet.struct.conv_layer import SimpleConvNet, MaxPoolingConvNet

importlib.reload(convnet.struct.layers)

%matplotlib inline

In [2]:
file_directory = '/Users/subhojit/Downloads/cifar-10-batches-py'
cdl = CIFAR_10_DataLoader()
xtrain_data, ytrain_data, Xtest, ytest = cdl.load_cifar_10_dataset(file_directory)
xtrain_data = xtrain_data.reshape(-1, 32, 32, 3)
Xtest = Xtest.reshape(-1, 32, 32, 3)

xtrain_data = xtrain_data.astype('float32') / 255.0
Xtest = Xtest.astype('float32') / 255.0

# np.random.shuffle(Xtrain)
n1 = int(0.8 * len(xtrain_data))
Xtrain = xtrain_data[:n1]
ytrain = ytrain_data[:n1]
Xdev = xtrain_data[n1:]
ydev = ytrain_data[n1:]

num_classes = len(set(ytrain))


In [3]:
image_depth = 3
first_layer_kernel_size = 6
second_layer_kernel_size = 10

In [6]:
class GolemCNN:

    def __init__(self):
        self.conv1 = SimpleConvNet(kernel_size=6, depth=image_depth, spatial_dim=5)
        self.relu1 = ReLU()
        self.maxpool1 = MaxPoolingConvNet(spatial_dim=2, stride=2)

        self.conv2 = SimpleConvNet(kernel_size=second_layer_kernel_size, depth=first_layer_kernel_size, spatial_dim=5)
        self.relu2 = ReLU()
        self.maxpool2 = MaxPoolingConvNet(spatial_dim=2, stride=2)

        fc_fan_in = second_layer_kernel_size * 5 * 5
        self.fc1 = Linear(fc_fan_in, num_classes)

    def forward(self, x):
        out = self.conv1.forward(x)
        out = self.relu1.forward(out)
        out = self.maxpool1.forward(out)

        out = self.conv2.forward(out)
        out = self.relu2.forward(out)
        out = self.maxpool2.forward(out)

        out = out.reshape(out.shape[0], -1) # flatten
        out = self.fc1.forward(out)
        return out

    def backward(self, dout):
        dout = self.fc1.backward(dout)
        dout = dout.reshape(self.maxpool2.pooled_out.shape)
        dout = self.maxpool2.backward(dout)
        dout = self.relu2.backward(dout)
        dout = self.conv2.backward(dout)
        dout = self.maxpool1.backward(dout)
        dout = self.relu1.backward(dout)
        dout = self.conv1.backward(dout)
        return dout

    def update_parameters(self, learning_rate):
        self.conv1.update_parameters(learning_rate)
        self.conv2.update_parameters(learning_rate)
        self.fc1.update_param(learning_rate)

In [7]:
max_iterations = 1000
batch_size = 128
lossi = []
Hs = []

model = GolemCNN()
loss_criteria = SoftmaxCrossEntropyLoss()

for i in range(max_iterations):

    #mini batch
    ix = np.random.randint(0, Xtrain.shape[0], (batch_size,))
    Xb, Yb = Xtrain[ix], ytrain[ix]

    logits = model.forward(Xb)
    loss = loss_criteria.forward(logits, Yb)
    lossi.append(loss)

    logits_grad = loss_criteria.backward()
    model.backward(logits_grad)

    lr = 0.1
    model.update_parameters(lr)

    if i % 100 == 0:
        print(f"loss: {loss}")


loss: 16.727725815225398


KeyboardInterrupt: 