In [3]:
import numpy as np
import matplotlib.pyplot as plt

from convnet.before.two_layer_net import num_classes
from data_loader import CIFAR_10_DataLoader
%matplotlib inline

In [4]:
file_directory = '/Users/subhojit/Downloads/cifar-10-batches-py'
cdl = CIFAR_10_DataLoader()
Xtrain, ytrain, Xtest, ytest = cdl.load_cifar_10_dataset(file_directory)

Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape

((50000, 3072), (50000,), (10000, 3072), (10000,))

In [20]:
num_classes = len(set(ytrain))

{np.int64(0),
 np.int64(1),
 np.int64(2),
 np.int64(3),
 np.int64(4),
 np.int64(5),
 np.int64(6),
 np.int64(7),
 np.int64(8),
 np.int64(9)}

In [18]:
np.random.seed(231)
std_dev = 1e-2
W1 = np.random.randn(3072, 100) * std_dev
b1 = np.zeros(100)
W2 = np.random.randn(100, num_classes) * std_dev
b2 = np.zeros(10)

ix = np.random.randint(0, Xtrain.shape[0], (32,))
Xb, Yb = Xtrain[ix], ytrain[ix]
Xb.shape, Yb.shape, W1.shape, b1.shape, W2.shape, b2.shape

((32, 3072), (32,), (3072, 100), (100,), (100, 10), (10,))

In [50]:
h1 = np.dot(Xb, W1) + b1
h2 = np.dot(h1, W2) + b2
scores = h2
scores

array([[ 1.31230233e+01,  1.74482623e-01,  1.33671765e+00,
        -1.47911240e+01,  6.21281689e+00, -1.37099189e+01,
         6.54939581e+00,  5.68209542e+00, -3.82429304e+00,
        -1.76871127e+01],
       [ 1.51822980e+01,  1.98375737e+00, -2.84404575e+00,
        -9.23916105e+00,  5.63322379e+00, -1.25996341e+01,
         9.34274325e+00,  2.67062516e+00, -4.26869845e+00,
        -1.99972345e+01],
       [ 1.27177387e+01, -2.04675746e-01, -2.02816093e+00,
        -1.01519052e+01,  4.32840418e+00, -8.65080032e+00,
         9.73828094e+00,  2.27785687e+00, -5.20315709e+00,
        -1.60875826e+01],
       [-3.35302932e+00,  3.83990276e+00,  1.62621503e+00,
        -7.76524854e+00,  1.17294525e+00, -6.11047024e+00,
        -1.00842146e+00,  5.24322939e+00, -2.82681212e+00,
        -8.66830813e+00],
       [ 1.06113053e+01,  7.51724800e+00,  1.19214317e+00,
        -4.98004215e+00, -5.16569214e-01, -8.58518029e+00,
         2.71822104e-01, -2.61126700e-01,  5.40745237e+00,
        -1.

In [51]:
## SVM loss
def svm_loss(scores, y):
    num_examples = scores.shape[0]
    # print("num_examples =", num_examples)
    # print("np.arange(num_examples) =", np.arange(num_examples))
    corect_class_scores = scores[np.arange(num_examples), y].reshape(-1, 1)
    margin = np.maximum(0, scores - corect_class_scores + 1)
    margin[np.arange(num_examples), y] = 0
    loss = margin.sum() / num_examples
    return loss


s = np.array([[1,3,1], [2,1,1], [3,10, 1]])
print(s)
y = np.array([1, 2, 0])

svm_loss(s, y)


[[ 1  3  1]
 [ 2  1  1]
 [ 3 10  1]]


np.float64(3.6666666666666665)

In [52]:
loss_svm = svm_loss(scores, Yb)
loss_svm

np.float64(56.755346117301414)

In [56]:
exp_scores = np.exp(scores)
row_sum_scores = np.sum(exp_scores, axis=1).reshape(-1, 1)
normalized_scores = exp_scores / row_sum_scores
exp_scores.shape, row_sum_scores.shape, normalized_scores.shape

((32, 10), (32, 1), (32, 10))

In [59]:
def softmax_loss(scores, y):
    num_examples = scores.shape[0]
    # Shift scores for numerical stability
    shifted_scores = scores - np.max(scores, axis=1, keepdims=True)
    exp_scores = np.exp(shifted_scores)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    correct_log_probs = -np.log(probs[np.arange(num_examples), y])
    loss = np.mean(correct_log_probs)
    return loss

softmax_loss(scores, Yb)


np.float64(12.38209477589201)