Let's implement some basic classifiers
- KNN
- SVM
- Softmax

# Loading test data

Let's use the Cifar10 dataset to test all these classifiers! Why? It's a classsic!

In [None]:
from keras.datasets import cifar10
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Combine them for demo purposes
X = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))

# K Nearest Neighbors

Let's first build a simple Nearest Neighbors classifier.

A nearest neighbor classifier just computes the L1/L2 distance between an image and a set of images in its training class, assigning the output to the label of the image with the shortest distance. Let's implement it! 

In [None]:
import numpy as np
from tqdm import tqdm

class NearestNeighbor():
    def __init__(self,):
        self.training_data = []
        self.training_labels= []


    # Yep, that's all it does during training
    def train(self, X, y):
        self.training_data = X
        self.training_labels = y
    
    def compute_distance(self, X1, X2):
        return np.sqrt(np.sum((X1-X2)**2))
    
    
    def predict(self, X_test):
        distances = np.array([[np.linalg.norm(x_test - x_train) for x_train in self.training_data] for x_test in X_test])
        return np.array(self.training_labels)[distances.argmin(axis=1)]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X[:2000], y[:2000], test_size=0.2, random_state=42)

# Initialize and train our Nearest Neighbor classifier
nn = NearestNeighbor()
nn.train(X_train, y_train)

# eval
y_pred = nn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Now let's do **K Nearest Neighbors**

In [None]:
from collections import Counter

import numpy as np
from tqdm import tqdm

class KNearestNeighbors():
    def __init__(self,):
        self.training_data = []
        self.training_labels= []


    # Yep, that's all it does during training
    def train(self, X, y):
        self.training_data = X
        self.training_labels = y
    
    def compute_distance(self, X1, X2):
        return np.sqrt(np.sum((X1-X2)**2))
    
    
   
    def predict(self, X_test, k=3):
        predictions = []


        for i in tqdm(range(len(X_test))):
            dist_label_pairs = []

            for j in range(len(self.training_data)):
                dist = self.compute_distance(X_test[i], self.training_data[j])
                dist_label_pairs.append((dist, self.training_labels[j]))

            k_nearest = sorted(dist_label_pairs, key=lambda x: x[0])[:k]

            label_count = {}
            for _, label in k_nearest:


                if label.shape: 
                    label = label.item()  # Converts to scalar

                if label in label_count:
                    label_count[label] += 1
                else:
                    label_count[label] = 1

            best_label = max(label_count, key=label_count.get)
            predictions.append(best_label)

        return predictions

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X[:2000], y[:2000], test_size=0.2, random_state=42)

# New KNN + train
knn = KNearestNeighbors()
knn.train(X_train, y_train)

# Eval
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


That's slightly better! Let's move on to more advanced classifiers, specifically 
- Support Vector Machines
- Softmax Classifiers

They function in the same way (calculating wx + b and refining the layers), but with different loss values

# Generalized Structure for linear model classifiers

We can define a class for both, which will share the general update structure, but utilize different loss + gradient calculations. The shared functionality will be in 
- Defining hyperparameters (learning rate, epochs)
- Training
- Predicting

They will differ in their
- Loss functions
- Gradient functions


In [None]:
class LinearClassifier(object):
    def __init__(self):
        self.W = None

    def train(
        self,
        X,
        y,
        learning_rate=1e-3,
        reg=1e-5,
        num_iters=100,
        batch_size=200,
        verbose=False,
    ):
        num_train, dim = X.shape
        num_classes = (
            np.max(y) + 1
        )  
        if self.W is None:
            self.W = 0.001 * np.random.randn(dim, num_classes)

        # SGD
        loss_history = []
        for it in range(num_iters):
            X_batch = None
            y_batch = None

            indices = np.random.choice(num_train, batch_size)
            X_batch = X[indices]
            y_batch = y[indices]
            
            # Loss update
            loss, grad = self.loss(X_batch, y_batch, reg)
            loss_history.append(loss)
            self.W -= learning_rate * grad


            if verbose and it % 100 == 0:
                print("iteration %d / %d: loss %f" % (it, num_iters, loss))

        return loss_history

    def predict(self, X):
        y_pred = np.zeros(X.shape[0])

        y_pred = np.argmax(X @ self.W, axis=1)

        return y_pred

    def loss(self, X_batch, y_batch, reg):
        pass


## Some more data processing!
woohoo

In [None]:

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

print('Training data shape: ', X_train.shape)
print('Training labels shape: ', y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

In [None]:
num_training = 49000
num_validation = 1000
num_test = 1000
num_dev = 500

# Validation set
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]

# Training set
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]

# Dev set (overkill)
mask = np.random.choice(num_training, num_dev, replace=False)
X_dev = X_train[mask]
y_dev = y_train[mask]

mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
y_train =y_train.squeeze()
y_val = y_val.squeeze()
y_test = y_test.squeeze()

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

print('Training data shape: ', X_train.shape)
print('Validation data shape: ', X_val.shape)
print('Test data shape: ', X_test.shape)
print('dev data shape: ', X_dev.shape)


In [None]:
# Preprocessing: mean image
mean_image = np.mean(X_train, axis=0).astype('uint8')

X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# Subsume bias dimenisio 
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

In [None]:
type(X_train[0][0])

All set for the classifiers

## SVM Loss
Now we can define the SVM loss + gradient to use with the above structure and so on.

Loss is $L_i = \sum_{j \neq y_i}\text{max}(0,s_j-s_{y_j}+\Delta)$
- Get the classes
- Calculate the hinge loss for each
- Sum


In [None]:

def svm_loss_vectorized(W, X, y, reg):

    loss = 0.0
    dW = np.zeros(W.shape)  

    N = len(y)    
    Y_hat = X @ W 

    y_hat_true = Y_hat[range(N), y][:, np.newaxis]   
    margins = np.maximum(0, Y_hat - y_hat_true + 1)  
    loss = margins.sum() / N - 1 + reg * np.sum(W**2) 
    dW = (margins > 0).astype(int)   
    dW[range(N), y] -= dW.sum(axis=1) 
    dW = X.T @ dW / N + 2 * reg * W
    return loss, dW

class LinearSVM(LinearClassifier):


    def loss(self, X_batch, y_batch, reg):
        return svm_loss_vectorized(self.W, X_batch, y_batch, reg)



In [None]:
SVM = LinearSVM()
SVM.train(X_train, y_train)
predictions = SVM.predict(X_test)
print(f"{np.sum(predictions==y_test) / len(predictions)} accuracy")


## Softmax class


In [None]:

def softmax_loss_vectorized(W, X, y, reg):
    loss = 0.0
    dW = np.zeros_like(W)

    N = X.shape[0]
    Y_hat = X @ W
    P = np.exp(Y_hat - Y_hat.max())     
    P /= P.sum(axis=1, keepdims=True)    
    loss = -np.log(P[range(N), y]).sum()
    loss = loss / N + reg * np.sum(W**2) 

    P[range(N), y] -= 1                  
    dW = X.T @ P / N + 2 * reg * W      
    return loss, dW

class Softmax(LinearClassifier):


    def loss(self, X_batch, y_batch, reg):
        return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)

In [None]:
SM = Softmax()
SM.train(X_train, y_train)
predictions = SM.predict(X_test)
print(f"{np.sum(predictions==y_test) / len(predictions)} accuracy")
