In [24]:
import numpy as np
import matplotlib.pyplot as plt

from data_loader import CIFAR_10_DataLoader
import losses
%matplotlib inline

In [63]:
file_directory = '/Users/subhojit/Downloads/cifar-10-batches-py'
cdl = CIFAR_10_DataLoader()
Xtrain, ytrain, Xtest, ytest = cdl.load_cifar_10_dataset(file_directory)

Xtrain = Xtrain.astype('float32') / 255.0
Xtest = Xtest.astype('float32') / 255.0



In [26]:
num_classes = len(set(ytrain))

In [64]:
# data preparation

np.random.seed(231)
std_dev = 1e-2
W1 = np.random.randn(3072, 100) * std_dev
b1 = np.zeros(100)
W2 = np.random.randn(100, num_classes) * std_dev
b2 = np.zeros(10)

ix = np.random.randint(0, Xtrain.shape[0], (32,))
Xb, Yb = Xtrain[ix], ytrain[ix]
Xb.shape, Yb.shape, W1.shape, b1.shape, W2.shape, b2.shape

((32, 3072), (32,), (3072, 100), (100,), (100, 10), (10,))

In [65]:
Xb

array([[0.9137255 , 0.8980392 , 0.92941177, ..., 0.2901961 , 0.29411766,
        0.29803923],
       [0.6666667 , 0.6627451 , 0.67058825, ..., 0.3529412 , 0.34901962,
        0.34901962],
       [0.78039217, 0.74509805, 0.8117647 , ..., 0.5058824 , 0.49803922,
        0.46666667],
       ...,
       [0.3137255 , 0.5137255 , 0.5372549 , ..., 0.02745098, 0.        ,
        0.        ],
       [0.972549  , 0.6117647 , 0.18039216, ..., 0.4509804 , 0.4862745 ,
        0.5254902 ],
       [0.33333334, 0.32156864, 0.31764707, ..., 0.33333334, 0.30980393,
        0.32156864]], dtype=float32)

In [74]:
h1 = np.dot(Xb, W1) + b1
scores = np.dot(h1, W2) + b2
parameters = [W1, b1, W2, b2]

In [67]:
def softmax_loss(scores, y):
    epsilon = 1e-12
    num_examples = scores.shape[0]
    shifted_scores = scores - np.max(scores, axis=1, keepdims=True)
    exp_scores = np.exp(shifted_scores)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    correct_log_probs = -np.log(probs[np.arange(num_examples), y] + epsilon)
    loss = np.mean(correct_log_probs)
    return loss

In [75]:
softmax_loss(scores, Yb)

np.float64(2.2999197773618327)

In [69]:
def softmax_numpy(x, axis=1):
    x_shifted = x - np.max(x, axis=axis, keepdims=True)
    exp_x = np.exp(x_shifted)
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

In [73]:
#backward pass
dscores = softmax_numpy(scores)
dscores[np.arange(Xb.shape[0]), Yb] -= 1
dscores /= Xb.shape[0]

dW2 = np.dot(h1.T, dscores)
db2 = np.sum(dscores, axis=0)
dh1 = np.dot(dscores, W2.T)
dW1 = np.dot(Xb.T, dh1)
db1 = np.sum(dh1, axis=0)

grads = [dW1, db1, dW2, db2]
print(grads)
lr = 0.01
W1 += -lr * dW1
b1 += -lr * db1
W2 += -lr * dW2
b2 += -lr * db2



[array([[ 0.00053936, -0.00066423,  0.001845  , ...,  0.0001863 ,
         0.00104582,  0.00083819],
       [ 0.00019935, -0.00054903,  0.0017557 , ...,  0.00013418,
         0.001498  ,  0.00091838],
       [ 0.00018162, -0.00041366,  0.00178251, ...,  0.00021925,
         0.00156347,  0.00089997],
       ...,
       [ 0.00082111, -0.00021791,  0.00111135, ..., -0.0004479 ,
         0.0005593 ,  0.00042885],
       [ 0.00092018, -0.00020271,  0.00119189, ..., -0.00036318,
         0.00035353,  0.00041855],
       [ 0.00087888, -0.00023753,  0.00121286, ..., -0.00030596,
         0.00026504,  0.00037476]]), array([ 6.11455305e-04,  1.74624717e-04,  7.65470961e-04, -1.14808240e-03,
        4.54504214e-04, -3.49436562e-04,  2.35263095e-03, -6.55334011e-04,
       -6.56899164e-05, -1.34966095e-03,  6.99713931e-04, -6.57724022e-05,
       -1.08860946e-03, -6.04056182e-04,  2.35362115e-04, -1.06549580e-03,
       -6.75010571e-04,  6.81509590e-04,  3.19593459e-04,  7.12263964e-04,
        3.