In [39]:
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import importlib

from convnet.struct.loss import SoftmaxCrossEntropyLoss

project_root = os.path.abspath('/Users/subhojit/workspace/saturn/src')
if project_root not in sys.path:
    sys.path.append(project_root)
import convnet.struct
from convnet.data_loader import CIFAR_10_DataLoader
from convnet.struct.layers import Linear, ReLU, Model, BatchNorm1d

importlib.reload(convnet.struct.layers)

%matplotlib inline

In [45]:
def random_crop_and_flip(flat_image, crop_size=32, padding=4):
    image = flat_image.reshape(3, 32, 32)

    image = np.transpose(image, (1, 2, 0))

    padded_image = np.pad(image, ((padding, padding), (padding, padding), (0, 0)), mode='constant')

    top = np.random.randint(0, 2 * padding)
    left = np.random.randint(0, 2 * padding)
    cropped = padded_image[top:top+crop_size, left:left+crop_size, :]

    if np.random.rand() < 0.5:
        cropped = np.fliplr(cropped)

    cropped = np.transpose(cropped, (2, 0, 1))

    return cropped.reshape(-1)

In [46]:
file_directory = '/Users/subhojit/Downloads/cifar-10-batches-py'
cdl = CIFAR_10_DataLoader()
xtrain_data, ytrain_data, Xtest, ytest = cdl.load_cifar_10_dataset(file_directory)

xtrain_data = xtrain_data.astype('float32') / 255.0
Xtest = Xtest.astype('float32') / 255.0

# np.random.shuffle(Xtrain)
n1 = int(0.8 * len(xtrain_data))
Xtrain = xtrain_data[:n1]
ytrain = ytrain_data[:n1]
Xdev = xtrain_data[n1:]
ydev = ytrain_data[n1:]

num_classes = len(set(ytrain))

In [47]:
# data preparation
np.random.seed(231)
std_dev = 1e-3
n_hidden = 100
weight_decay = 1e-4 # regularization

layers = [
    Linear(3072, n_hidden, have_bias=False, std_dev=std_dev, weight_decay=weight_decay),
    BatchNorm1d(n_hidden),
    ReLU(),
    # Linear(n_hidden, n_hidden, have_bias=False, std_dev=std_dev, weight_decay=weight_decay),
    # BatchNorm1d(n_hidden),
    # ReLU(),
    Linear(n_hidden, n_hidden, have_bias=False, std_dev=std_dev, weight_decay=weight_decay),
    BatchNorm1d(n_hidden),
    ReLU(),
    Linear(n_hidden, num_classes, have_bias=False, std_dev=std_dev, weight_decay=weight_decay)
]

model = Model(layers)
loss_criteria = SoftmaxCrossEntropyLoss()

params = [p for layer in layers for p in layer.parameters()]

print(sum(par.size for par in params))

318600


In [48]:
max_iterations = 10000
batch_size = 128
lossi = []
Hs = []

for i in range(max_iterations):

    #mini batch
    ix = np.random.randint(0, Xtrain.shape[0], (batch_size,))
    Xb, Yb = Xtrain[ix], ytrain[ix]
    Xbatch_aug = np.array([random_crop_and_flip(img) for img in Xb])
    Xbatch_aug = Xbatch_aug.reshape(Xbatch_aug.shape[0], -1)

    #farward pass
    x = Xbatch_aug
    logits = model.forward(x)
    data_loss = loss_criteria.forward(logits, Yb)
    reg_loss = loss_criteria.l2_regularization(model, 1e-4)
    loss = data_loss + reg_loss
    lossi.append(loss)

    #backward pass
    logit_grad = loss_criteria.backward()
    model.backward(logit_grad)

    #param update
    lr = 0.1
    model.update_param(lr)

    if i % 100 == 0:
        print(f"loss: {loss:.4f}")

loss: 2.3022
loss: 1.9651
loss: 1.8486
loss: 1.8516
loss: 1.6233
loss: 1.5990
loss: 1.5339
loss: 1.4189
loss: 1.5226
loss: 1.5736
loss: 1.3911
loss: 1.4848
loss: 1.4304
loss: 1.6856
loss: 1.4702
loss: 1.4208
loss: 1.5312
loss: 1.6363
loss: 1.4840
loss: 1.3871
loss: 1.5261
loss: 1.5978
loss: 1.5132
loss: 1.4527
loss: 1.5880
loss: 1.3296
loss: 1.5343
loss: 1.3188
loss: 1.4898
loss: 1.3793
loss: 1.3686
loss: 1.5023
loss: 1.2423
loss: 1.4257
loss: 1.5395
loss: 1.3478
loss: 1.3906
loss: 1.3387
loss: 1.5277
loss: 1.4182
loss: 1.2422
loss: 1.4611
loss: 1.3370
loss: 1.4391
loss: 1.3964
loss: 1.2113
loss: 1.3632
loss: 1.3386
loss: 1.3101
loss: 1.2646
loss: 1.4768
loss: 1.3548
loss: 1.3525
loss: 1.4008
loss: 1.4479
loss: 1.3497
loss: 1.2786
loss: 1.4238
loss: 1.4043
loss: 1.4349
loss: 1.2908
loss: 1.1472
loss: 1.2260
loss: 1.2990
loss: 1.4005
loss: 1.3257
loss: 1.2363
loss: 1.2476
loss: 1.3522
loss: 1.1855
loss: 1.4287
loss: 1.4927
loss: 1.3730
loss: 1.2734
loss: 1.3179
loss: 1.3133
loss: 1.2343

In [44]:
Xb[0].shape

(3072,)

In [49]:
def split_loss(split):
    x, y = {
        'train': (Xtrain, ytrain),
        'dev': (Xdev, ydev),
        'test': (Xtest, ytest),
    }[split]
    for layer in layers:
        if isinstance(layer, BatchNorm1d):
            layer.train = False
        x = layer.forward(x)
    logits = x
    loss = loss_criteria.forward(logits, y)
    print(f"{split} => loss: {loss:.4f}")

split_loss('train')
split_loss('dev')

train => loss: 1.2107
dev => loss: 1.2835


In [50]:
split_loss('test')

test => loss: 1.2823


In [51]:
def accuracy(x, labels):
    for layer in layers:
        if isinstance(layer, BatchNorm1d):
            layer.train = False
        x = layer.forward(x)
    logits = x
    probs = loss_criteria.softmax_numpy(logits)
    preds = np.argmax(probs, axis=1)
    return np.mean(preds == labels)

accuracy(Xtest, ytest)

np.float64(0.5448)

# Note
Here at the beginning I have lots of layers which was overfitting the training data. That is it is memorizing the training data and not generaizing properly. That reflects in the training loss and dev loss being far apart. I got a training loss of 0.6 and dev loss of 2.2 and test loss at similar scale with dev loss and getting accuracy of ~51% on testset.
Then introducing l2 regularization does not fix the problem completely. When I added horizontal flipping, cropping the images , the overfitting stopped and train and dev loss came to similar scale.

