In [2]:
import random
import numpy as np
import matplotlib.pyplot as plt
from cs231n.data_utils import load_CIFAR10
import time

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [23]:
class linear_svm(object):
    
    def __init__(self):
        pass
    
    def compute_loss_noloop(self, W, X, y):
        delta = 1.0
        loss_naive = 0.0
        
        scores = X.dot(W)
        z = np.arange(X.shape[0])

        corr_scor = np.reshape(scores[z,y], (scores[z,y].shape[0],1))
        corr_scor = np.broadcast_to(corr_scor, (corr_scor.shape[0],W.shape[1]))
        #print(scores[ix_(:,y)].shape)
        margins = np.maximum(scores - corr_scor + delta,0)
        margins[z,y] = 0
        
        loss_naive += np.sum(margins)
        
        return loss_naive + np.linalg.norm(W)
        
    def compute_loss_oneloop(self, W, X, y):
        delta = 1.0
        loss_naive = 0.0
        
        for i in range(len(X)):
            scores = X[i].dot(W)
            margins = np.maximum(scores - scores[y[i]] + delta,0)
            margins[y[i]] = 0
            loss_naive += np.sum(margins)
            
        return loss_naive + np.linalg.norm(W)
    
    def compute_loss_twoloops(self, W, X, y):
        delta = 1.0
        loss_naive = 0.0
        
        for i in range(X.shape[0]):
            scores = X[i].dot(W)
            
            for j in range(len(scores)):
                if j != y[i]:
                    loss_naive += max(scores[j]-scores[y[i]] + delta, 0)
            
        return loss_naive + np.linalg.norm(W)
        
    def num_grad(self, f, W):
        h = 0.0001
        grad = np.zeros(W.shape)
        
        it = np.nditer(W, flags=['multi_index'], op_flags=['readwrite'])
        
        while not it.finished:
            ix = it.multi_index
            fx = f(W)
            old_val = W[ix]
            W[ix] += h
            fxh = f(W)
            W[ix] = old_val
            grad[ix] = (fxh-fx)/h
            it.iternext()
            
        return grad
    
    def loss_function(self, W):
        return self.compute_loss_noloop(W, self.X, self.y)
    
    def svm_loss_naive(self, W, X, y, eps):
        self.X = X
        self.y = y
        grad = self.num_grad(self.loss_function, W)
        return grad, self.compute_loss_oneloop(W, X, y)
    


            

In [4]:
cifar_dir = 'cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar_dir)

print('Training Data:', X_train.shape)
print('Training Labels:', y_train.shape)
print('Testing Data:', X_test.shape)
print('Testing Labels:', y_test.shape)

('Training Data:', (50000L, 32L, 32L, 3L))
('Training Labels:', (50000L,))
('Testing Data:', (10000L, 32L, 32L, 3L))
('Testing Labels:', (10000L,))


In [5]:
num_training = 49000
num_validation = 1000
num_test = 1000
num_dev = 100

#Validation Set
X_val = X_train[range(num_training, num_training+num_validation)]
y_val = y_train[range(num_training, num_training+num_validation)]

#Training Set
X_train = X_train[range(num_training)]
y_train = y_train[range(num_training)]

#Development Set
mask = np.random.choice(num_training, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]

#Testing Set
X_test = X_test[range(num_test)]
y_test = y_test[range(num_test)]

print('Testing Data:', X_val.shape)
print('Testing Labels:', X_dev.shape)

X_train = np.reshape(X_train, (X_train.shape[0],-1))
X_test = np.reshape(X_test, (X_test.shape[0],-1))
X_val = np.reshape(X_val, (X_val.shape[0],-1))
X_dev = np.reshape(X_dev, (X_dev.shape[0],-1))

('Testing Data:', (1000L, 32L, 32L, 3L))
('Testing Labels:', (100L, 32L, 32L, 3L))


In [6]:
mean_image = np.mean(X_train, axis=0)

X_train -= mean_image
X_test -= mean_image
X_val -= mean_image
X_dev -= mean_image

X_train = np.hstack([X_train, np.ones((X_train.shape[0],1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0],1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0],1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0],1))])

In [21]:
print(X_train.shape, y_train.shape)

W = np.random.randn(X_train.shape[1], 10)*0.0001

print(np.linalg.norm(W))
svm = linear_svm()
eps = 0.00005

start = time.time()
loss = svm.compute_loss_oneloop(W, X_train, y_train)
end = time.time()
print("one loop takes ", end - start)
loss2 = svm.compute_loss_twoloops(W, X_train, y_train)
end2 = time.time()
print("Two loops take ", end2 - end)
loss3 = svm.compute_loss_noloop(W, X_train, y_train)
end3 = time.time()
print("No loop takes ", end3 - end2)


print('Loss = ', loss)
print('Loss = ', loss2)
print('Loss = ', loss3)

((49000L, 3073L), (49000L,))
0.0174459601853
('one loop takes ', 1.0870001316070557)
('Two loops take ', 1.1949999332427979)
('No loop takes ', 0.11100006103515625)
('Loss = ', 447481.06533266651)
('Loss = ', 447481.06533266575)
('Loss = ', 447481.06533266732)


In [25]:
svm = linear_svm()
grad, loss = svm.svm_loss_naive(W, X_dev, y_dev, eps)

print(grad, loss)

(array([[ -1.40054719e+03,  -4.56853497e+03,  -8.10130200e+02, ...,
          1.82593646e+03,  -1.31919961e+03,  -7.33300231e+03],
       [ -1.99766710e+03,  -4.39238481e+03,   5.26135529e+02, ...,
          1.01021847e+03,  -2.08165128e+03,  -8.53425906e+03],
       [ -4.47951483e+03,  -4.29904701e+03,   1.90521157e+03, ...,
          2.01379118e+03,  -4.30200537e+03,  -8.57636688e+03],
       ..., 
       [ -3.56550719e+03,  -5.45989503e+03,  -2.82887353e+02, ...,
         -1.45349758e+03,   2.03061878e+03,  -4.75935136e+02],
       [ -4.99918812e+03,  -6.37360379e+03,   6.61802965e+02, ...,
          2.27613324e+02,   6.47408755e+02,  -7.83983764e+01],
       [  1.79975588e+01,  -2.09950813e+01,   8.00214470e+00, ...,
          1.10002676e+01,   1.90130896e+01,  -1.39989049e+01]]), 953.70393940650831)
