In [0]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as io
import os

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [0]:
from dataset import load_svhn, random_split_train_val
from gradient_check import check_gradient
from metrics import multiclass_accuracy

In [0]:
def prepare_for_linear_classifier(train_X, test_X):
    train_flat = train_X.reshape(train_X.shape[0], -1).astype(np.float) / 255.0
    test_flat = test_X.reshape(test_X.shape[0], -1).astype(np.float) / 255.0
    
    # Subtract mean
    mean_image = np.mean(train_flat, axis = 0)
    train_flat -= mean_image
    test_flat -= mean_image
    
    # Add another channel with ones as a bias term
    train_flat_with_ones = np.hstack([train_flat, np.ones((train_X.shape[0], 1))])
    test_flat_with_ones = np.hstack([test_flat, np.ones((test_X.shape[0], 1))])    
    return train_flat_with_ones, test_flat_with_ones
    
train_X, train_y, test_X, test_y = load_svhn("data", max_train=10000, max_test=1000)    
train_X, test_X = prepare_for_linear_classifier(train_X, test_X)
# Split train into train and val
train_X, train_y, val_X, val_y = random_split_train_val(train_X, train_y, num_val = 1000)

Gradient check: 

![image](https://wikimedia.org/api/rest_v1/media/math/render/svg/22fc2c0a66c63560a349604f8b6b39221566236d)



In [0]:
def softmax(preds):
    dim = preds.ndim
    if dim == 1:
        c = np.max(preds)
        return np.exp(preds - c) / np.sum(np.exp(preds - c))
    sm = []
    log = 0
    for p in preds:
        c = np.max(p)
        sm.append(np.exp(p - c) / np.sum(np.exp(p - c)))
    return np.stack(sm, axis=0)


def cross_entropy_loss(predictions, targets):
    if predictions.ndim == 1:
        return -np.log(predictions[targets])
    N = predictions.shape[0]
    ce = (-np.sum(np.log(predictions[np.arange(N), targets.flatten()]))) / N
    return ce


def softmax_with_cross_entropy(predictions, target_index):
    ndim = predictions.ndim
    sm = softmax(predictions)
    loss = cross_entropy_loss(sm, target_index)
    dprediction = np.zeros_like(sm)
    if ndim == 1:
        dprediction[target_index] = 1
    else:
        dprediction[np.arange(dprediction.shape[0]),
                    target_index.flatten()] = 1
    dprediction = sm - dprediction
    return loss, dprediction


In [0]:
# TODO Implement combined function or softmax and cross entropy and produce gradient
loss, grad = softmax_with_cross_entropy(np.array([1, 0, 0]), 1)
check_gradient(lambda x: softmax_with_cross_entropy(x, 1), np.array([1, 0, 0], np.float))

0.5761168847658291 0.5761168847651099
-0.7880584423829146 -0.7880584423691771
0.21194155761708544 0.2119415576151695
Gradient check passed!


True

In [0]:
# TODO Extend combined function so it can receive a 2d array with batch of samples
# Test batch_size = 1
num_classes = 4
batch_size = 1
predictions = np.random.randint(-1, 3, size=(batch_size, num_classes)).astype(np.float)
target_index = np.random.randint(0, num_classes, size=(batch_size, 1)).astype(np.int)
check_gradient(lambda x: softmax_with_cross_entropy(x, target_index), predictions)

# Test batch_size = 3
num_classes = 4
batch_size = 3
predictions = np.random.randint(-1, 3, size=(batch_size, num_classes)).astype(np.float)
target_index = np.random.randint(0, num_classes, size=(batch_size, 1)).astype(np.int)
check_gradient(lambda x: softmax_with_cross_entropy(x, target_index), predictions)

# Make sure maximum subtraction for numberic stability is done separately for every sample in the batch
probs = softmax(np.array([[20,0,0], [1000, 0, 0]]))
assert np.all(np.isclose(probs[:, 0], 1.0))

0.05406459218899647 0.05406459219203796
0.39948630465030277 0.39948630465858054
0.39948630465030277 0.3994863046474783
-0.853037201489602 -0.8530372014869946
Gradient check passed!
0.08714431874203257 0.08714431878331651
0.6439142598879724 0.6439142599123926
0.23688281808991013 0.23688281807210385
-0.967941396719915 -0.967941396723404
0.13447071068499755 0.13447071069982997
0.13447071068499755 0.13447071069982997
0.36552928931500245 0.3655292892812411
-0.6344707106849976 -0.6344707106809011
-0.6005136953496972 -0.6005136953479706
0.39948630465030277 0.39948630465858054
0.14696279851039795 0.14696279850845428
0.05406459218899647 0.054064592180935726
Gradient check passed!


In [0]:
def linear_softmax(X, W, target_index):
    '''
    Performs linear classification and returns loss and gradient over W

    Arguments:
      X, np array, shape (num_batch, num_features) - batch of images
      W, np array, shape (num_features, classes) - weights
      target_index, np array, shape (num_batch) - index of target classes

    Returns:
      loss, single value - cross-entropy loss
      gradient, np.array same shape as W - gradient of weight by loss

    '''
    m = X.shape[0]
    predictions = np.dot(X, W)
    loss, grad = softmax_with_cross_entropy(predictions, target_index)
    dW = np.dot(X.T, grad)
    return loss, dW

In [0]:
# TODO Implement linear_softmax function that uses softmax with cross-entropy for linear classifier
batch_size = 2
num_classes = 2
num_features = 3
np.random.seed(42)
W = np.random.randint(-1, 3, size=(num_features, num_classes)).astype(np.float)
print(W)
X = np.random.randint(-1, 3, size=(batch_size, num_features)).astype(np.float)
print(X)
target_index = np.ones(batch_size, dtype=np.int)
loss, dW = linear_softmax(X, W, target_index)
check_gradient(lambda w: linear_softmax(X, w, target_index), W)

[[ 1.  2.]
 [-1.  1.]
 [ 1.  2.]]
[[-1. -1.  1.]
 [ 0.  1.  1.]]
-0.8807970779778823 -0.8807970779844964
0.8807970779778824 0.8807970779844964
-0.8333712048003156 -0.8333712048225194
0.8333712048003158 0.8333712048225194
0.9282229511554491 0.9282229511464734
-0.9282229511554491 -0.9282229511464734
Gradient check passed!


True

In [0]:
class LinearSoftmaxClassifier():
    def __init__(self):
        self.W = None

    def fit(self, X, y, batch_size=5, learning_rate=1e-7, reg=1e-5,
            epochs=1):
        '''
        Trains linear classifier
        
        Arguments:
          X, np array (num_samples, num_features) - training data
          y, np array of int (num_samples) - labels
          batch_size, int - batch size to use
          learning_rate, float - learning rate for gradient descent
          reg, float - L2 regularization strength
          epochs, int - number of epochs
        '''

        num_train = X.shape[0]
        num_features = X.shape[1]
        num_classes = np.max(y)+1
        if self.W is None:
            self.W = 0.001 * np.random.randn(num_features, num_classes)

        loss_history = []
        for epoch in range(epochs):
            shuffled_indices = np.arange(num_train)
            np.random.shuffle(shuffled_indices)
            sections = np.arange(batch_size, num_train, batch_size)
            batches_indices = np.array_split(shuffled_indices, sections)
            epoch_loss = []
            for i in range(len(batches_indices)):
                # X
                batch = X[batches_indices[i]]
                # Y
                target = y[batches_indices[i]]
                loss, dW = linear_softmax(batch, self.W, target)
                # add l2 regularization
                loss += reg * np.sum(np.square(self.W))
                dW += 2 * reg * self.W
                # weight update
                self.W -= learning_rate * dW
                
                epoch_loss.append(loss)
            epoch_loss = np.mean(epoch_loss)
        
            #end
            print("Epoch %i, loss: %f" % (epoch, epoch_loss))
            loss_history.append(epoch_loss)

        return loss_history

    def predict(self, X):
        '''
        Produces classifier predictions on the set
       
        Arguments:
          X, np array (test_samples, num_features)

        Returns:
          y_pred, np.array of int (test_samples)
        '''
        y_pred = np.dot(X, self.W).argmax(axis=1)
        return y_pred

In [0]:
# Let's check how it performs on validation set
classifier = LinearSoftmaxClassifier()

pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy: ", accuracy)

# Now, let's train more and see if it performs better
classifier.fit(train_X, train_y, epochs=100, learning_rate=1e-4, batch_size=300, reg=1e-6)
pred = classifier.predict(val_X)
accuracy = multiclass_accuracy(pred, val_y)
print("Accuracy after training for 100 epochs: ", accuracy)

Accuracy:  0.237
Epoch 0, loss: 2.175519
Epoch 1, loss: 2.171229
Epoch 2, loss: 2.166625
Epoch 3, loss: 2.163731
Epoch 4, loss: 2.160160
Epoch 5, loss: 2.157405
Epoch 6, loss: 2.154834
Epoch 7, loss: 2.151925
Epoch 8, loss: 2.149470
Epoch 9, loss: 2.147739
Epoch 10, loss: 2.145332
Epoch 11, loss: 2.144307
Epoch 12, loss: 2.141439
Epoch 13, loss: 2.139962
Epoch 14, loss: 2.138375
Epoch 15, loss: 2.137155
Epoch 16, loss: 2.135265
Epoch 17, loss: 2.133773
Epoch 18, loss: 2.132158
Epoch 19, loss: 2.131099
Epoch 20, loss: 2.130079
Epoch 21, loss: 2.128374
Epoch 22, loss: 2.126958
Epoch 23, loss: 2.126249
Epoch 24, loss: 2.124995
Epoch 25, loss: 2.124023
Epoch 26, loss: 2.123141
Epoch 27, loss: 2.121611
Epoch 28, loss: 2.121093
Epoch 29, loss: 2.119966
Epoch 30, loss: 2.119203
Epoch 31, loss: 2.117631
Epoch 32, loss: 2.117739
Epoch 33, loss: 2.117146
Epoch 34, loss: 2.115582
Epoch 35, loss: 2.115289
Epoch 36, loss: 2.114254
Epoch 37, loss: 2.114077
Epoch 38, loss: 2.113000
Epoch 39, loss: 2.

In [0]:
num_epochs = 200
batch_size = 300

learning_rates = [1e-4, 1e-5, 1e-7]
reg_strengths = [1e-3, 1e-4, 1e-5, 1e-6]

best_classifier = None
best_val_accuracy = 0
best_history = None

for lr in learning_rates:
    for reg in reg_strengths:
        classifier = LinearSoftmaxClassifier()
        loss_history = classifier.fit(train_X, 
                                      train_y, 
                                      epochs=num_epochs, 
                                      learning_rate=lr, 
                                      batch_size=batch_size, 
                                      reg=reg)
        
        pred = classifier.predict(val_X)
        accuracy = multiclass_accuracy(pred, val_y)
        
        if accuracy > best_val_accuracy:
            best_classifier = classifier
            best_val_accuracy = accuracy
            best_history = loss_history
        
        print('current accuracy = ', accuracy)
        print('----------------------------------')

print('best validation accuracy achieved: %f' % best_val_accuracy)

Epoch 0, loss: 2.293854
Epoch 1, loss: 2.272100
Epoch 2, loss: 2.255290
Epoch 3, loss: 2.239571
Epoch 4, loss: 2.228289
Epoch 5, loss: 2.217225
Epoch 6, loss: 2.208166
Epoch 7, loss: 2.200153
Epoch 8, loss: 2.193511
Epoch 9, loss: 2.187995
Epoch 10, loss: 2.183011
Epoch 11, loss: 2.178604
Epoch 12, loss: 2.175043
Epoch 13, loss: 2.170544
Epoch 14, loss: 2.167460
Epoch 15, loss: 2.165400
Epoch 16, loss: 2.161914
Epoch 17, loss: 2.158938
Epoch 18, loss: 2.157094
Epoch 19, loss: 2.154335
Epoch 20, loss: 2.152889
Epoch 21, loss: 2.150565
Epoch 22, loss: 2.149226
Epoch 23, loss: 2.147881
Epoch 24, loss: 2.146073
Epoch 25, loss: 2.144064
Epoch 26, loss: 2.143192
Epoch 27, loss: 2.141091
Epoch 28, loss: 2.139878
Epoch 29, loss: 2.138554
Epoch 30, loss: 2.137349
Epoch 31, loss: 2.135893
Epoch 32, loss: 2.135411
Epoch 33, loss: 2.134291
Epoch 34, loss: 2.132688
Epoch 35, loss: 2.131586
Epoch 36, loss: 2.129657
Epoch 37, loss: 2.129557
Epoch 38, loss: 2.128449
Epoch 39, loss: 2.128033
Epoch 40, 

Epoch 122, loss: 2.079671
Epoch 123, loss: 2.079260
Epoch 124, loss: 2.078618
Epoch 125, loss: 2.079209
Epoch 126, loss: 2.078618
Epoch 127, loss: 2.077897
Epoch 128, loss: 2.077843
Epoch 129, loss: 2.077348
Epoch 130, loss: 2.077016
Epoch 131, loss: 2.077017
Epoch 132, loss: 2.076006
Epoch 133, loss: 2.075771
Epoch 134, loss: 2.075456
Epoch 135, loss: 2.075729
Epoch 136, loss: 2.074661
Epoch 137, loss: 2.074817
Epoch 138, loss: 2.074619
Epoch 139, loss: 2.074422
Epoch 140, loss: 2.073325
Epoch 141, loss: 2.073955
Epoch 142, loss: 2.073770
Epoch 143, loss: 2.072536
Epoch 144, loss: 2.073247
Epoch 145, loss: 2.072579
Epoch 146, loss: 2.072426
Epoch 147, loss: 2.072740
Epoch 148, loss: 2.071488
Epoch 149, loss: 2.071172
Epoch 150, loss: 2.071733
Epoch 151, loss: 2.070808
Epoch 152, loss: 2.070386
Epoch 153, loss: 2.070359
Epoch 154, loss: 2.070469
Epoch 155, loss: 2.070027
Epoch 156, loss: 2.069809
Epoch 157, loss: 2.069105
Epoch 158, loss: 2.069214
Epoch 159, loss: 2.068495
Epoch 160, l

Epoch 39, loss: 2.123329
Epoch 40, loss: 2.122492
Epoch 41, loss: 2.121585
Epoch 42, loss: 2.120429
Epoch 43, loss: 2.119983
Epoch 44, loss: 2.118622
Epoch 45, loss: 2.118080
Epoch 46, loss: 2.116547
Epoch 47, loss: 2.115850
Epoch 48, loss: 2.115068
Epoch 49, loss: 2.114527
Epoch 50, loss: 2.113950
Epoch 51, loss: 2.112290
Epoch 52, loss: 2.112406
Epoch 53, loss: 2.111567
Epoch 54, loss: 2.111260
Epoch 55, loss: 2.110009
Epoch 56, loss: 2.109131
Epoch 57, loss: 2.108365
Epoch 58, loss: 2.107406
Epoch 59, loss: 2.107240
Epoch 60, loss: 2.106511
Epoch 61, loss: 2.105605
Epoch 62, loss: 2.105160
Epoch 63, loss: 2.104404
Epoch 64, loss: 2.103995
Epoch 65, loss: 2.103418
Epoch 66, loss: 2.103171
Epoch 67, loss: 2.101981
Epoch 68, loss: 2.101668
Epoch 69, loss: 2.100600
Epoch 70, loss: 2.100658
Epoch 71, loss: 2.099856
Epoch 72, loss: 2.099580
Epoch 73, loss: 2.097831
Epoch 74, loss: 2.097818
Epoch 75, loss: 2.097955
Epoch 76, loss: 2.096912
Epoch 77, loss: 2.096175
Epoch 78, loss: 2.095771


Epoch 159, loss: 2.159740
Epoch 160, loss: 2.159407
Epoch 161, loss: 2.159162
Epoch 162, loss: 2.158901
Epoch 163, loss: 2.158631
Epoch 164, loss: 2.158349
Epoch 165, loss: 2.158098
Epoch 166, loss: 2.157851
Epoch 167, loss: 2.157563
Epoch 168, loss: 2.157322
Epoch 169, loss: 2.157081
Epoch 170, loss: 2.156805
Epoch 171, loss: 2.156544
Epoch 172, loss: 2.156279
Epoch 173, loss: 2.156061
Epoch 174, loss: 2.155849
Epoch 175, loss: 2.155621
Epoch 176, loss: 2.155343
Epoch 177, loss: 2.155108
Epoch 178, loss: 2.154892
Epoch 179, loss: 2.154716
Epoch 180, loss: 2.154400
Epoch 181, loss: 2.154119
Epoch 182, loss: 2.153912
Epoch 183, loss: 2.153752
Epoch 184, loss: 2.153492
Epoch 185, loss: 2.153253
Epoch 186, loss: 2.153029
Epoch 187, loss: 2.152761
Epoch 188, loss: 2.152579
Epoch 189, loss: 2.152371
Epoch 190, loss: 2.152139
Epoch 191, loss: 2.151916
Epoch 192, loss: 2.151715
Epoch 193, loss: 2.151510
Epoch 194, loss: 2.151262
Epoch 195, loss: 2.151064
Epoch 196, loss: 2.150859
Epoch 197, l

KeyboardInterrupt: 

In [0]:
print('best validation accuracy achieved: %f' % best_val_accuracy)

best validation accuracy achieved: 0.251000


In [0]:
test_pred = best_classifier.predict(test_X)
test_accuracy = multiclass_accuracy(test_pred, test_y)
print('Linear softmax classifier test set accuracy: %f' % (test_accuracy, ))

Linear softmax classifier test set accuracy: 0.208000
