In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
plt.style.use('seaborn-whitegrid')
%matplotlib inline

In [24]:
# Generate artificial data with 3 samples, 4 features per sample
# and 3 output classes
num_samples = 3 # number of samples
num_features = 4 # number of features (a.k.a. dimensionality)
num_labels = 3 # number of output labels
# Data matrix (each column = single sample)
X = np.random.choice(np.arange(0, 5), size = (num_features, num_samples), replace = True)
# Class labels
y = np.random.choice([0, 1, 2], size = num_samples, replace = True)
# Weight matrix
W = np.random.choice(np.arange(-2, 3), size = (num_labels, num_features), replace = True)
print('X = ')
print(X)
print('y = ')
print(y)
print('W = ')
print(W)

X = 
[[4 1 2]
 [2 1 4]
 [0 2 4]
 [1 1 2]]
y = 
[1 1 0]
W = 
[[ 0 -1  0 -2]
 [ 0  0  2 -2]
 [ 1  1  2 -1]]


In [25]:
# Calculate the scores matrix
S = np.dot(W, X)
print('S = ')
print(S)

S = 
[[-4 -3 -8]
 [-2  2  4]
 [ 5  5 12]]


In [26]:
# Correct category score for each sample
print('y = ')
print(y)
print('S = ')
print(S[y, np.arange(num_samples)])

y = 
[1 1 0]
S = 
[-2  2 -8]


In [27]:
# Compute the margins matrix where in each column corresponds
# to the corresponding column from the scores matrix with the
# correct category score subtracted and a one (delta) added. This process is
# done only for the incorrect category scores in each column.
delta = 1 # SVM parameter
M = S - S[y, np.arange(num_samples)] + delta
print('S = ')
print(S)
print('y = ')
print(y)
print('M = ')
print(M)
# Set margin for correct category in each column equal to 0
M[y, np.arange(num_samples)] = 0
print('M = ')
print(M)

S = 
[[-4 -3 -8]
 [-2  2  4]
 [ 5  5 12]]
y = 
[1 1 0]
M = 
[[-1 -4  1]
 [ 1  1 13]
 [ 8  4 21]]
M = 
[[-1 -4  0]
 [ 0  0 13]
 [ 8  4 21]]


In [28]:
# SVM loss calculation (for each sample, it's simply the sum of the elements in
# each column of margins derived in the previous step followed by a max-check 
# w.r.t. 0
print('Sum of each column of margins matrix = ')
print(np.sum(M, axis = 0))
loss = np.maximum(0, np.sum(M, axis = 0))
print('Loss for each sample = ')
print(loss)
# Average loss across all samples
loss = np.mean(loss)
print('Average loss = ')
print(loss)

Sum of each column of margins matrix = 
[ 7  0 34]
Loss for each sample = 
[ 7  0 34]
Average loss = 
13.666666666666666


In [29]:
# Print categories
print('Correct categories = ')
print(y)

# Print margins matrix
print('M = ')
print(M)

# Non-negative entries in margins matrix
print('Non-negative entries in margins matrix = ')
print((M > 0).astype(int))

# Number of non-negative entries in each column of margins matrix
print('Number of non-negative entries in each column of margins matrix = ')
np.sum((M > 0).astype(int), axis = 0)

Correct categories = 
[1 1 0]
M = 
[[-1 -4  0]
 [ 0  0 13]
 [ 8  4 21]]
Non-negative entries in margins matrix = 
[[0 0 0]
 [0 0 1]
 [1 1 1]]
Number of non-negative entries in each column of margins matrix = 


array([1, 1, 2])

In [30]:
# Adjust margins matrix such that in each column the correct category row has an 
# entry equal to -1 times number of margins greater than 0 in that column and 
# incorrect category rows each have 1 if their margins are greater than 0.

print('Correct categories = ')
print(y)

print('Margins matrix = ')
print(M)

# Update margins matrix to represent presence of non-negative entries
M = (M > 0).astype(int)

print('Non-negative entries in margins matrix = ')
print(M)

# Adjust correct category row in each column
M[y, np.arange(num_samples)] = -np.sum(M, axis = 0)

# Adjust incorrect category rows in each column
# Actually, nothing to be done here as the entries are already 0/1
print('Adjusted margins matrix = ')
print(M)

Correct categories = 
[1 1 0]
Margins matrix = 
[[-1 -4  0]
 [ 0  0 13]
 [ 8  4 21]]
Non-negative entries in margins matrix = 
[[0 0 0]
 [0 0 1]
 [1 1 1]]
Adjusted margins matrix = 
[[ 0  0 -2]
 [-1 -1  1]
 [ 1  1  1]]


In [31]:
# Calculate the gradient of the average loss w.r.t. the weights matrix
dW = (1/num_samples) * np.dot(M, X.T)
print('Gradient matrix = ')
print(dW)

Gradient matrix = 
[[-1.33333333 -2.66666667 -2.66666667 -1.33333333]
 [-1.          0.33333333  0.66666667  0.        ]
 [ 2.33333333  2.33333333  2.          1.33333333]]
