In [19]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
from dataset import load_svhn, random_split_train_val
from gradient_check import check_gradient
from metrics import multiclass_accuracy 
import linear_classifer

In [21]:
def prepare_for_linear_classifier(train_X, test_X):
    train_flat = train_X.reshape(train_X.shape[0], -1).astype(float) / 255.0
    test_flat = test_X.reshape(test_X.shape[0], -1).astype(float) / 255.0
     # Subtract mean
    mean_image = np.mean(train_flat, axis = 0)
    train_flat -= mean_image
    test_flat -= mean_image
    
    # Add another channel with ones as a bias term
    train_flat_with_ones = np.hstack([train_flat, np.ones((train_X.shape[0], 1))])
    test_flat_with_ones = np.hstack([test_flat, np.ones((test_X.shape[0], 1))])    
    return train_flat_with_ones, test_flat_with_ones

train_X, train_y, test_X, test_y = load_svhn("data", max_train = 10000, max_test = 1000)
train_X, test_X = prepare_for_linear_classifier(train_X, test_X)
# Split train into train and val
train_X, train_y, val_X, val_y = random_split_train_val(train_X, train_y, num_val = 1000)

In [None]:
train_X.shape

In [22]:
# TODO: Implement check_gradient function in gradient_check.py
# All the functions below should pass the gradient check

def square(x):
    return float(x*x), 2*x

check_gradient(square, np.array([3.0]))


(9000, 3073)

In [4]:
# TODO: Implement check_gradient function in gradient_check.py
# All the functions below should pass the gradient check

def square(x):
    return float(x*x), 2*x

check_gradient(square, np.array([3.0]))


Gradient check passed!


True

In [5]:
def array_sum(x):
    assert x.shape == (2,), x.shape
    return np.sum(x), np.ones_like(x)

check_gradient(array_sum, np.array([3.0, 2.0]))


Gradient check passed!


True

In [6]:
def array_2d_sum(x):
    assert x.shape == (2,2)
    return np.sum(x), np.ones_like(x)

check_gradient(array_2d_sum, np.array([[3.0, 2.0], [1.0, 0.0]]))

Gradient check passed!


True

In [7]:
loss, grad = linear_classifer.softmax_with_cross_entropy(np.array([[0., 0., 0.],[1., 0., 0.]]), np.array([0, 0]))
check_gradient(lambda x: linear_classifer.softmax_with_cross_entropy(x, np.array([0, 0])), np.array([[0., 0., 0.],[1., 0., 0.]], float))

Gradient check passed!


True

In [8]:
np.random.seed(50)
num_classes = 4
batch_size = 1
predictions = np.random.randint(-1, 3, size=(batch_size, num_classes)).astype(float)
predictions

array([[-1., -1.,  2.,  0.]])

In [9]:
target_index = np.random.randint(0, num_classes, size=(batch_size, 1)).astype(int)
target_index

array([[1]])

In [10]:
check_gradient(lambda x: linear_classifer.softmax_with_cross_entropy(x, target_index), predictions)

Gradient check passed!


True

In [11]:
linear_classifer.softmax(predictions)

array([[0.04031637, 0.04031637, 0.80977599, 0.10959126]])

In [12]:
linear_classifer.softmax_with_cross_entropy(predictions, target_index)

(3.2109976232381756,
 array([[ 0.04031637, -0.95968363,  0.80977599,  0.10959126]]))

In [13]:
# Test batch_size = 3
num_classes = 10
batch_size = 10
predictions = np.random.randint(0, 2, size=(batch_size, num_classes)).astype(float)
predictions

array([[0., 0., 0., 1., 0., 0., 1., 1., 0., 1.],
       [0., 1., 1., 0., 0., 1., 0., 1., 0., 0.],
       [0., 0., 1., 1., 1., 1., 0., 1., 0., 0.],
       [1., 0., 1., 0., 1., 0., 1., 1., 1., 0.],
       [0., 1., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 1., 1., 0., 0., 1., 0., 0., 1.],
       [1., 0., 0., 1., 0., 0., 1., 1., 0., 0.],
       [0., 0., 1., 0., 1., 1., 0., 1., 1., 1.],
       [1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
       [1., 1., 0., 0., 1., 0., 0., 0., 0., 1.]])

In [14]:
target_index = np.random.randint(0, num_classes, size=(batch_size, 1)).astype(int)
target_index

array([[4],
       [6],
       [3],
       [0],
       [5],
       [8],
       [3],
       [6],
       [2],
       [8]])

In [15]:
check_gradient(lambda x: linear_classifer.softmax_with_cross_entropy(x, target_index), predictions)

Gradient check passed!


True

In [16]:
batch_size = 10
num_classes = 3
num_features = 4
np.random.seed(51)
W = np.random.randint(-1, 3, size=(num_features, num_classes)).astype(float)
X = np.random.randint(-1, 3, size=(batch_size, num_features)).astype(float)
target_index = np.ones(batch_size, dtype=int)

loss, dW = linear_classifer.linear_softmax(X, W, target_index)
check_gradient(lambda w: linear_classifer.linear_softmax(X, w, target_index), W)

Gradient check passed!


True

In [17]:
# TODO Implement l2_regularization function that implements loss for L2 regularization
linear_classifer.l2_regularization(W, 0.01)
check_gradient(lambda w: linear_classifer.l2_regularization(w, 0.01), W)

Gradient check passed!


True

In [None]:
# TODO: Implement LinearSoftmaxClassifier.fit function
classifier = linear_classifer.LinearSoftmaxClassifier()
loss_history = classifier.fit(train_X, train_y, epochs=5000, learning_rate=0.0001, batch_size=300, reg=0.001)

Epoch 0, loss: 688.282826
Epoch 1, loss: 681.731860
Epoch 2, loss: 676.464461


In [None]:
probs = linear_classifer.softmax(np.array([1, -0.2, 1]))
linear_classifer.cross_entropy_loss(probs, 1)

In [None]:
np.min(loss_history)

In [None]:
np.mean(loss_history)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
replica_of_W

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=5000, learning_rate=0.0002, batch_size=300, reg=0.001)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:

# let's look at the loss history!
plt.plot(loss_history)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=1000, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
plt.plot(loss_history)

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=1000, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=1000, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy: ", accuracy)

In [None]:
classifier = linear_classifer.LinearSoftmaxClassifier()
loss_history = classifier.fit(train_X, train_y, epochs=200, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy: ", accuracy)

In [None]:
np.mean(loss_history)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
replica_of_W

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=5000, learning_rate=0.0002, batch_size=300, reg=0.001)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:

# let's look at the loss history!
plt.plot(loss_history)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=1000, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
plt.plot(loss_history)

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=1000, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
loss_history = classifier.fit(train_X, train_y, epochs=1000, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
replica_of_W = classifier.copy_W()

In [None]:
pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy: ", accuracy)

In [None]:
classifier = linear_classifer.LinearSoftmaxClassifier()
loss_history = classifier.fit(train_X, train_y, epochs=200, learning_rate=0.0001, batch_size=300, reg=0.001)

In [None]:
pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy: ", accuracy)