In [1]:
import numpy as np

**Loading the dataset**
We use the *pickle* library to read the data.

In [2]:
def load_data(filename):
    import pickle
    with open(filename, 'rb') as fo:
        dataset_dict = pickle.load(fo, encoding='bytes')
        X_data = np.transpose(dataset_dict[b'data'])
        Y_labels = dataset_dict[b'labels']
        return (X_data, Y_labels)

In [3]:
def data_loss(x, W, y):
    '''Computes the SVM loss using a linear classifier.
    W --> A (10x3073) Weight Matrix including the bias term
    x --> A matrix in which each image of the training set is flattened out as a column vector. 
        Hence, if the dataset has 10,000 images, x is a (3073 x 10000) matrix
    y --> Index of true class label values between 0 to 9 representing each of the 10 classes of CIFAR-10. This will be a 
        (1x10000) row vector'''
    
    # Computing the scores
    scores = np.dot(W, x) # scores will be a (10x10000) matrix representing score for each class for each training example.
    
    for i in range(10000):
        scores[:, i] = scores[:, i] - scores[y[i], i] + 1

    loss_i = np.sum(np.maximum(0,scores)/10000)
    return loss_i

In [4]:
def regularization_loss(W):
    return np.sum(np.square(W))

Loading the first batch and exploring the dataset.

In [5]:
data = unpickle('cifar-10-batches-py/data_batch_1')
print(data.keys())
print('Total number of labels: ', len(data[b'labels']))
print('Datatype of labels: ', type(data[b'labels']))
print('First five labels:', data[b'labels'][:5])
print('\n')

print(len(data[b'data']))
print(type(data[b'data']))
print(data[b'data'].shape)
print(data[b'data'][:5])

dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
Total number of labels:  10000
Datatype of labels:  <class 'list'>
First five labels: [6, 9, 9, 4, 1]


10000
<class 'numpy.ndarray'>
(10000, 3072)
[[ 59  43  50 ... 140  84  72]
 [154 126 105 ... 139 142 144]
 [255 253 253 ...  83  83  84]
 [ 28  37  38 ...  28  37  46]
 [170 168 177 ...  82  78  80]]


The training dataset batch is a (10000 x 3072) matrix.
However, we wish to represent each training example stretched out as a column vector. Hence, transpose operation is to be done
to obtain (3072 x 10000) matrix

In [6]:
X = data[b'data'] # X is 10000 x 3072
X = np.transpose(X)

To accomodate the bias term additonal row of all ones to be added at the end of the matrix making it (3073 x 10000) dimensional

In [7]:
X = np.vstack([X, np.ones(X.shape[1])])

Creating the weight matrix W of (10 x 3073), where each row acts as a template for each class of the CIFAR-10.

In [8]:
W = np.random.randn(10, 3073) * 0.001

 Training labels y as a 10000-dimensional row vector

In [9]:
y = data[b'labels']

Computing the loss

In [10]:
best_loss = float('inf')
best_W = np.ones((10,3073), dtype=float)

for i in range(1000):
    W = np.random.randn(10, 3073) * 0.001
    loss = data_loss(X, W, y) + regularization_loss(W)
    print(i, 'Loss:', loss)
    if loss < best_loss:
        best_loss = loss
        best_W = W

0 Loss: 45.26459919583429
1 Loss: 36.809760059592634
2 Loss: 50.719038817769764
3 Loss: 36.0554557935068
4 Loss: 44.25722551542478
5 Loss: 42.99006987370996
6 Loss: 47.10477702662925
7 Loss: 45.082159153334956
8 Loss: 48.71218673809068
9 Loss: 46.97472962877628
10 Loss: 49.79688801460596
11 Loss: 53.1553709322582
12 Loss: 34.20288396090927
13 Loss: 61.481072202818176
14 Loss: 40.00774609382795
15 Loss: 44.422747615778405
16 Loss: 54.73400421603473
17 Loss: 42.46193228699769
18 Loss: 43.68399145629166
19 Loss: 44.42200972499074
20 Loss: 50.15604293622954
21 Loss: 53.905616279575696
22 Loss: 43.928668782360766
23 Loss: 58.521936936618175
24 Loss: 31.37023053337608
25 Loss: 33.27978360397904
26 Loss: 50.41227732260022
27 Loss: 41.76927929448295
28 Loss: 33.69176474887338
29 Loss: 40.983206443846875
30 Loss: 41.87715232053408
31 Loss: 40.716457969592206
32 Loss: 41.50556192353702
33 Loss: 42.1177717855226
34 Loss: 31.432427755535976
35 Loss: 48.042090868673114
36 Loss: 47.24697999441056
37

294 Loss: 41.26725278378266
295 Loss: 45.1403824669564
296 Loss: 34.02349827215166
297 Loss: 37.223766956880745
298 Loss: 38.464380137842554
299 Loss: 54.96286989142184
300 Loss: 48.49584147633669
301 Loss: 43.24386034010447
302 Loss: 37.2116071292942
303 Loss: 49.11725527114768
304 Loss: 47.348020551359156
305 Loss: 39.01638714307707
306 Loss: 38.6546674794338
307 Loss: 49.830226199913525
308 Loss: 38.43754762332264
309 Loss: 69.42933838409527
310 Loss: 34.31574306915691
311 Loss: 38.12532368614479
312 Loss: 46.528535530941866
313 Loss: 67.14795426947836
314 Loss: 47.08714496851665
315 Loss: 35.916003318507954
316 Loss: 55.347968923077204
317 Loss: 46.33479781443834
318 Loss: 42.25409180681292
319 Loss: 46.039710648855525
320 Loss: 47.5647493597257
321 Loss: 44.01984707476972
322 Loss: 28.47587838583846
323 Loss: 37.71344521410017
324 Loss: 51.327353718268014
325 Loss: 55.29100787238037
326 Loss: 31.3288028322562
327 Loss: 34.12384283749561
328 Loss: 33.32234064175098
329 Loss: 44.117

586 Loss: 39.77359801605728
587 Loss: 36.7188201180416
588 Loss: 39.770469448458336
589 Loss: 42.66820565818812
590 Loss: 42.242783110243714
591 Loss: 39.017309141667084
592 Loss: 50.95319618905971
593 Loss: 29.835840731977576
594 Loss: 36.30137364278305
595 Loss: 38.18530350276503
596 Loss: 51.51715357284396
597 Loss: 46.04838179449836
598 Loss: 36.00520694259885
599 Loss: 33.4574349325123
600 Loss: 23.46096614564726
601 Loss: 42.51082047072124
602 Loss: 42.36527447999569
603 Loss: 36.32329246411005
604 Loss: 39.62477258510619
605 Loss: 55.78723807881207
606 Loss: 33.117509652034556
607 Loss: 46.41812521847356
608 Loss: 58.763686669504786
609 Loss: 52.80651782026603
610 Loss: 34.76318308481876
611 Loss: 49.828922488837556
612 Loss: 55.44077711685604
613 Loss: 54.19700532773398
614 Loss: 32.27011602750772
615 Loss: 43.08816180866899
616 Loss: 50.230239023045094
617 Loss: 44.60953517556348
618 Loss: 56.91376350042575
619 Loss: 42.694612150398484
620 Loss: 35.23244048877166
621 Loss: 40.

877 Loss: 42.38130135486677
878 Loss: 44.22157796578736
879 Loss: 37.21331509067989
880 Loss: 49.29105835571738
881 Loss: 41.70111818046965
882 Loss: 41.87522450244825
883 Loss: 42.30138885798269
884 Loss: 38.126539533534206
885 Loss: 45.1471288021888
886 Loss: 31.565614662349045
887 Loss: 37.69477762946606
888 Loss: 51.70179021250077
889 Loss: 39.366312644390874
890 Loss: 45.66326340026594
891 Loss: 35.66772824038136
892 Loss: 43.15801285042846
893 Loss: 32.84459377080249
894 Loss: 51.23228724925695
895 Loss: 35.210701759268616
896 Loss: 38.832606637202595
897 Loss: 44.17317877739023
898 Loss: 49.24705645124881
899 Loss: 50.20363001446702
900 Loss: 36.026880713945125
901 Loss: 48.141373571809204
902 Loss: 45.07938302753366
903 Loss: 36.53798797240261
904 Loss: 34.53345251932512
905 Loss: 52.237851284629855
906 Loss: 58.28555207105665
907 Loss: 45.99343854486418
908 Loss: 44.448549208425014
909 Loss: 36.77688273642949
910 Loss: 39.44600186467382
911 Loss: 49.06441958330001
912 Loss: 62

In [11]:
print(best_loss)

23.46096614564726


In [12]:
print(best_W)

[[ 1.09802504e-03 -7.32156265e-04 -1.68089190e-03 ... -7.75376244e-05
   9.21506639e-04  6.78315018e-04]
 [ 1.44448251e-04  9.03680981e-04 -5.94457438e-04 ... -5.25525107e-05
   1.75335596e-03  1.59935099e-03]
 [-1.89200156e-03 -8.04716456e-04 -1.52596309e-04 ... -1.19218818e-03
   1.18762677e-03 -9.04664499e-04]
 ...
 [-6.28600380e-04 -5.43409850e-04 -7.64967335e-04 ... -6.05915917e-04
   6.06442771e-04  5.69309105e-05]
 [-3.96316072e-04  1.30343416e-03 -1.53782166e-03 ...  7.91045721e-04
   5.24094029e-04 -1.71899057e-04]
 [ 6.46770007e-04 -5.98553191e-04 -4.09025102e-04 ... -6.31533507e-04
   5.08034021e-04  2.14444724e-03]]


In [13]:
# testing back on training data
scores = np.dot(best_W, X)
print(scores.shape)
pred_y = np.argmax(scores, axis=0)
accuracy = np.mean(pred_y == y)
print(accuracy)

(10, 10000)
0.0948


In [None]:
len(data[b'labels'])

In [None]:
temp = np.array([[1,2,3,4],[10,20,30,40], [100,200,300,400]])
print(temp)
print('\n\n')

b = [1,2,0,1]
for i in range(4):
    temp[:, i] = temp[:, i] - temp[b[i], i]
print(temp)
print('\n\n')
print(np.maximum(0, temp))

print('\n\n')
print(np.sum(np.maximum(0, temp)))

In [None]:
temp

In [None]:
temp = np.vstack([temp, np.ones(temp.shape[1])])
temp

In [None]:
np.argmax(temp, axis=0)