In [3]:
import numpy as np
from copy import deepcopy
def svm_loss_naive(W, X, y, reg):
  """
  Structured SVM loss function, naive implementation (with loops).

  Inputs have dimension D, there are C classes, and we operate on minibatches
  of N examples.

  Inputs:
  - W: A numpy array of shape (D, C) containing weights.
  - X: A numpy array of shape (N, D) containing a minibatch of data.
  - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
  - reg: (float) regularization strength

  Returns a tuple of:
  - loss as single float
  - gradient with respect to weights W; an array of same shape as W
  """
  dW = np.zeros(W.shape) # initialize the gradient as zero

  # compute the loss and the gradient
  num_classes = W.shape[1]
  num_train = X.shape[0]
  loss = 0.0
  for i in range(num_train):  # Loop through training examples
    scores = X[i].dot(W)  # The class score is a dot product between X and W
    correct_class_score = scores[y[i]] # Score assigned to the correct class
    for j in range(num_classes):  # Compute hinge loss by looping though classes
      if j == y[i]:  # no need to compute loss for correct class
        continue
      margin = scores[j] - correct_class_score + 1 # note delta = 1
      if margin > 0:  # we only add to the loss if margin exceed 0
        loss += margin
        dW[:,j] += X[i] 
        dW[:,y[i]] -= X[i] 

  # Right now the loss is a sum over all training examples, but we want it
  # to be an average instead so we divide by num_train.
  loss /= num_train
  dW /= num_train

  # Add regularization to the loss.
  loss += 0.5 * reg * np.sum(W * W) 
  dW += reg * W
  return loss, dW


In [4]:
N=2
D=3
C=3
targets = [0,0]
delta = 1
reg = 0

# Random data generation
np.random.seed(123)
W = np.random.randn(D,C)
# X = np.random.randn(N,D)
X = np.array([[1,2,3],[4,5,6]])
X

array([[1, 2, 3],
       [4, 5, 6]])

In [5]:
N = X.shape[0]
score_mat = X.dot(W)
correct_class_scores = score_mat[range(N),targets].reshape(N,1)
margins = score_mat - correct_class_scores + delta
margins[range(N),targets] = 0
margins = np.clip(margins, 0, None)
dataloss = np.mean(np.sum(margins, axis=1))
regularizationloss = 0.5 * reg * np.sum(W*W)
totalloss = dataloss + regularizationloss

In [4]:
mask = deepcopy(margins)
mask[mask > 0] = 1
dW = np.zeros(W.shape)

In [5]:
print(X,"\n\n", margins,"\n\n",W)

[[1 2 3]
 [4 5 6]] 

 [[ 0.         10.93166482 19.76191811]
 [ 0.         25.9569762  44.41878567]] 

 [[-1.0856306   0.99734545  0.2829785 ]
 [-1.50629471 -0.57860025  1.65143654]
 [-2.42667924 -0.42891263  1.26593626]]


In [6]:
row_sum = np.sum(mask, axis=1)
row_sum

array([2., 2.])

In [7]:
mask[range(N), targets] -= row_sum.T
dW = np.dot(X.T, mask) / N
dW += reg * W
print(totalloss, dW)

50.534672398328325 [[-5.   2.5  2.5]
 [-7.   3.5  3.5]
 [-9.   4.5  4.5]]


In [8]:
svm_loss_naive(W,X, targets, reg)

(50.53467239832832, array([[-5. ,  2.5,  2.5],
        [-7. ,  3.5,  3.5],
        [-9. ,  4.5,  4.5]]))

# KNN

In [62]:
N = 10
N_test = 5
D = 3
np.random.seed(123)
X_train = np.random.randint(10,size=(N, D))
y_train = np.random.randint(1,size=(N,1))
X_test = np.random.randint(10,size=(N_test, D))
# Identity = np.tile(np.identity(N),N_test).T
# tmp1 = Identity.dot(X_train).reshape(N_test,N,D)
# tmp2 = X_test.reshape(N_test,1,D)
# distances = tmp1 - tmp2
# np.sqrt(np.sum(distances**2,axis=2))

In [74]:
# distances = X_train-X_test.reshape(N_test,1,D)
# distances.shape
np.sqrt(np.sum((X_train-X_test.reshape(N_test,1,D))**2,axis=2))

array([[ 7.34846923,  9.43398113,  5.38516481, 10.77032961, 10.86278049,
         7.07106781,  6.164414  ,  6.08276253,  2.23606798,  4.35889894],
       [ 4.24264069,  6.70820393,  5.        , 10.67707825, 10.19803903,
         5.47722558,  2.82842712,  3.31662479,  1.73205081,  3.        ],
       [ 6.40312424,  9.48683298,  1.41421356,  8.66025404,  9.43398113,
         3.        ,  6.40312424,  4.24264069,  3.16227766,  6.63324958],
       [ 6.164414  ,  8.06225775,  5.74456265, 10.48808848, 10.19803903,
         7.07106781,  4.89897949,  5.19615242,  1.73205081,  3.60555128],
       [ 2.        ,  3.31662479,  8.54400375,  9.2736185 ,  7.61577311,
         7.48331477,  4.24264069,  3.60555128,  7.28010989,  8.30662386]])

In [100]:
term1 = np.sum(X_train**2, axis=1).reshape(-1,1)
term2 = 2*X_train.dot(X_test.T)
term3 = np.sum(X_test**2, axis=1)
distances = np.sqrt(term1 - term2 + term3)
distances

array([[ 7.34846923,  4.24264069,  6.40312424,  6.164414  ,  2.        ],
       [ 9.43398113,  6.70820393,  9.48683298,  8.06225775,  3.31662479],
       [ 5.38516481,  5.        ,  1.41421356,  5.74456265,  8.54400375],
       [10.77032961, 10.67707825,  8.66025404, 10.48808848,  9.2736185 ],
       [10.86278049, 10.19803903,  9.43398113, 10.19803903,  7.61577311],
       [ 7.07106781,  5.47722558,  3.        ,  7.07106781,  7.48331477],
       [ 6.164414  ,  2.82842712,  6.40312424,  4.89897949,  4.24264069],
       [ 6.08276253,  3.31662479,  4.24264069,  5.19615242,  3.60555128],
       [ 2.23606798,  1.73205081,  3.16227766,  1.73205081,  7.28010989],
       [ 4.35889894,  3.        ,  6.63324958,  3.60555128,  8.30662386]])

In [97]:
print(np.sum(X_train[:,np.newaxis,:]**2, axis=2),
      "\n\n",
       np.sum(X_train**2, axis=1).reshape(-1,1))

[[ 44]
 [ 91]
 [ 37]
 [ 82]
 [ 90]
 [ 16]
 [ 66]
 [ 29]
 [ 69]
 [113]] 

 [[ 44]
 [ 91]
 [ 37]
 [ 82]
 [ 90]
 [ 16]
 [ 66]
 [ 29]
 [ 69]
 [113]]


In [101]:
from k_nearest_neighbor import KNearestNeighbor

In [102]:
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
classifier.compute_distances_one_loop(X_test)

0


array([[ 7.34846923,  9.43398113,  5.38516481, 10.77032961, 10.86278049,
         7.07106781,  6.164414  ,  6.08276253,  2.23606798,  4.35889894],
       [ 4.24264069,  6.70820393,  5.        , 10.67707825, 10.19803903,
         5.47722558,  2.82842712,  3.31662479,  1.73205081,  3.        ],
       [ 6.40312424,  9.48683298,  1.41421356,  8.66025404,  9.43398113,
         3.        ,  6.40312424,  4.24264069,  3.16227766,  6.63324958],
       [ 6.164414  ,  8.06225775,  5.74456265, 10.48808848, 10.19803903,
         7.07106781,  4.89897949,  5.19615242,  1.73205081,  3.60555128],
       [ 2.        ,  3.31662479,  8.54400375,  9.2736185 ,  7.61577311,
         7.48331477,  4.24264069,  3.60555128,  7.28010989,  8.30662386]])

In [107]:
np.array_split(distances, 3)

[array([[ 7.34846923,  4.24264069,  6.40312424,  6.164414  ,  2.        ],
        [ 9.43398113,  6.70820393,  9.48683298,  8.06225775,  3.31662479],
        [ 5.38516481,  5.        ,  1.41421356,  5.74456265,  8.54400375],
        [10.77032961, 10.67707825,  8.66025404, 10.48808848,  9.2736185 ]]),
 array([[10.86278049, 10.19803903,  9.43398113, 10.19803903,  7.61577311],
        [ 7.07106781,  5.47722558,  3.        ,  7.07106781,  7.48331477],
        [ 6.164414  ,  2.82842712,  6.40312424,  4.89897949,  4.24264069]]),
 array([[6.08276253, 3.31662479, 4.24264069, 5.19615242, 3.60555128],
        [2.23606798, 1.73205081, 3.16227766, 1.73205081, 7.28010989],
        [4.35889894, 3.        , 6.63324958, 3.60555128, 8.30662386]])]

In [109]:
import itertools

In [135]:
num_folds = 5
for fold in range(num_folds):
    val_idx = set([fold])
    train_idx = set([i for i in range(num_folds)]) - val_idx
    print(val_idx, train_idx)
    print(np.array([0, 1,2,3,4])[list(val_idx)])
    print(np.array([0,1,2,3,4])[list(train_idx)])

{0} {1, 2, 3, 4}
[0]
[1 2 3 4]
{1} {0, 2, 3, 4}
[1]
[0 2 3 4]
{2} {0, 1, 3, 4}
[2]
[0 1 3 4]
{3} {0, 1, 2, 4}
[3]
[0 1 2 4]
{4} {0, 1, 2, 3}
[4]
[0 1 2 3]


In [138]:
a = np.array([1,2,1])
b = np.array([1,1,1])
np.mean(a == b)

0.6666666666666666

# Softmax

In [172]:
W = np.array([[1,0],[0,1], [1,1]]).T # shape (D,C)
X = np.array([[1,2]]) # shape (N,D)
y = 1
num_classes = W.shape[1]
print(W.shape, X.shape)

(2, 3) (1, 2)


In [180]:
scores = X.dot(W)  #shape (N,C)
scores = scores + np.log(num_classes)
scores_exp = np.exp(scores)
scores_norm = scores_exp / np.sum(scores_exp)
loss = -np.log(scores_norm.ravel()[y])

In [181]:
loss

1.4076059644443804