Let's implement some basic classifiers
- KNN
- SVM
- Softmax

# Loading test data

Let's use the Cifar10 dataset to test all these classifiers! Why? It's a classsic!

In [2]:
from keras.datasets import cifar10
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Combine them for demo purposes
X = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))

# K Nearest Neighbors

Let's first build a simple Nearest Neighbors classifier.

A nearest neighbor classifier just computes the L1/L2 distance between an image and a set of images in its training class, assigning the output to the label of the image with the shortest distance. Let's implement it! 

In [11]:
import numpy as np
from tqdm import tqdm

class NearestNeighbor():
    def __init__(self,):
        self.training_data = []
        self.training_labels= []


    # Yep, that's all it does during training
    def train(self, X, y):
        self.training_data = X
        self.training_labels = y
    
    def compute_distance(self, X1, X2):
        return np.sqrt(np.sum((X1-X2)**2))
    
    
    def predict(self, X_test):
        distances = np.array([[np.linalg.norm(x_test - x_train) for x_train in self.training_data] for x_test in X_test])
        return np.array(self.training_labels)[distances.argmin(axis=1)]


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X[:2000], y[:2000], test_size=0.2, random_state=42)

# Initialize and train our Nearest Neighbor classifier
nn = NearestNeighbor()
nn.train(X_train, y_train)

# Predict on the test set
y_pred = nn.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 17.00%


Now let's do **K Nearest Neighbors**

In [23]:
from collections import Counter

import numpy as np
from tqdm import tqdm

class KNearestNeighbors():
    def __init__(self,):
        self.training_data = []
        self.training_labels= []


    # Yep, that's all it does during training
    def train(self, X, y):
        self.training_data = X
        self.training_labels = y
    
    def compute_distance(self, X1, X2):
        return np.sqrt(np.sum((X1-X2)**2))
    
    
   
    def predict(self, X_test, k=3):
        predictions = []

        for i in tqdm(range(len(X_test))):
            dist_label_pairs = []

            for j in range(len(self.training_data)):
                dist = self.compute_distance(X_test[i], self.training_data[j])
                dist_label_pairs.append((dist, self.training_labels[j]))

            k_nearest = sorted(dist_label_pairs, key=lambda x: x[0])[:k]

            label_count = {}
            for _, label in k_nearest:
                # Assuming label is a numpy.ndarray with a single value:
                # Convert ndarray to a hashable type
                if label.shape:  # Check if label is an array with elements
                    label = label.item()  # Converts a one-element array to a scalar

                if label in label_count:
                    label_count[label] += 1
                else:
                    label_count[label] = 1

            best_label = max(label_count, key=label_count.get)
            predictions.append(best_label)

        return predictions

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X[:2000], y[:2000], test_size=0.2, random_state=42)

# Initialize and train our Nearest Neighbor classifier
knn = KNearestNeighbors()
knn.train(X_train, y_train)

# Predict on the test set
y_pred = knn.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


100%|█████████████████████████████████████████| 400/400 [00:06<00:00, 65.35it/s]

Accuracy: 19.50%





That's slightly better! Let's move on to more advanced classifiers, specifically 
- Support Vector Machines
- Softmax Classifiers

They function in the same way (calculating wx + b and refining the layers), but with different loss values

# Generalized Structure for linear model classifiers

We can define a class for both, which will share the general update structure, but utilize different loss + gradient calculations. The shared functionality will be in 
- Defining hyperparameters (learning rate, epochs)
- Training
- Predicting

They will differ in their
- Loss functions
- Gradient functions


In [62]:
import numpy as np



class GeneralizedLinearModel:
    def __init__(self, learning_rate=0.01, epochs=1000, svm_loss_gradient=None):
        self.learning_rate=learning_rate
        self.epochs = epochs
        self.svm_loss_gradient = svm_loss_gradient
        
    def train(self, X, y):
        size, features = X.shape
        X = np.hstack([X, np.ones((size, 1))])
        
        # Simple linear model (weights.shape = no. of features, bias.shape = 1)
        self.weights = np.zeros(features + 1)
        
        for _ in range(self.epochs):
            output = np.dot(X, self.weights) 
            
            
            loss, dw = svm_loss_gradient(self.weights, X, y, output, 0.01)
            
            self.weights -= dW * self.learning_rate   
            
            if epoch % 10 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')
                
    def predict(self, X):
        X = np.hstack([X, np.ones((size, 1))])
        return np.dot(X, self.weights) 
            

def svm_loss_gradient(W, X, y, output, reg):
    N = X.shape[0]
    
    # Get the scores of the correct labels
    correct_scores = output[np.arange(N), y][:, np.newaxis]
    
    # Calculate margins
    margins = np.maximum(0, output - correct_scores + delta)
    margins[np.arange(N), y] = 0  # Do not consider correct class in margins

    # Compute loss: data loss + regularization loss
    loss = np.sum(margins) / N
    loss += reg * np.sum(W * W)
    
    # Compute gradient
    binary = margins > 0
    binary[np.arange(N), y] = -np.sum(binary, axis=1)

    dW = np.dot(X.T, binary)
    dW /= N
    dW += 2 * reg * W  # Gradient of regularization term

    return loss, dW
      

## SVM Loss
Now we can define the SVM loss + gradient to use with the above structure and so on.

Loss is $L_i = \sum_{j \neq y_i}\text{max}(0,s_j-s_{y_j}+\Delta)$
- Get the classes
- Calculate the hinge loss for each
- Sum


In [45]:
# SVM uses hinge loss to define the loss

# Predefine delta
delta = 1

def svm_loss_gradient(W, X, y, output, reg):
    N = X.shape[0]
    
    # Get the scores of the correct labels
    correct_scores = output[np.arange(N), y][:, np.newaxis]
    
    # Calculate margins
    margins = np.maximum(0, output - correct_scores + delta)
    margins[np.arange(N), y] = 0  # Do not consider correct class in margins

    # Compute loss: data loss + regularization loss
    loss = np.sum(margins) / N
    loss += reg * np.sum(W * W)
    
    # Compute gradient
    binary = margins > 0
    binary[np.arange(N), y] = -np.sum(binary, axis=1)

    dW = np.dot(X.T, binary)
    dW /= N
    dW += 2 * reg * W  # Gradient of regularization term

    return loss, dW



In [46]:
X_train, X_test, y_train, y_test = train_test_split(X[:2000], y[:2000], test_size=0.2, random_state=42)


SVM = GeneralizedLinearModel(svm_loss_gradient=svm_loss_gradient)

In [47]:
SVM.train(X_train, y_train)

ValueError: too many values to unpack (expected 2)

In [48]:
X_train.shape

(1600, 32, 32, 3)

In [65]:
import numpy as np

class GeneralizedLinearModel:
    def __init__(self, learning_rate=0.01, epochs=1000, svm_loss_gradient=None):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.svm_loss_gradient = self.svm_loss
        
    def train(self, X, y):
         
        size = X.shape[0]
        features = X.shape[1:]
        
        # Initialize weights (features + bias)
        self.weights = np.zeros(features)
        
        for epoch in range(self.epochs):
#             output = np.dot(X, self.weights) 
            
            loss, dW = self.svm_loss(self.weights, X, y, 0.01)
            
            self.weights -= dW * self.learning_rate
            
            if epoch % 10 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')
                
    def predict(self, X):
        size = X.shape[0]
        X = np.hstack([X, np.ones((size, 1))])  # Add bias term
        return np.dot(X, self.weights)

    def svm_loss(self, W, X, y, reg):
        loss = 0.0
        dW = np.zeros(W.shape)  
        print(W.shape, X.shape)

        N = len(y)     # number of samples
        Y_hat = X @ W  # raw scores matrix

        y_hat_true = Y_hat[range(N), y][:, np.newaxis]    # scores for true labels
        margins = np.maximum(0, Y_hat - y_hat_true + 1)   # margin for each score
        loss = margins.sum() / N - 1 + reg * np.sum(W**2) # regularized loss

        dW = (margins > 0).astype(int)    # initial gradient with respect to Y_hat
        dW[range(N), y] -= dW.sum(axis=1) # update gradient to include correct labels
        dW = X.T @ dW / N + 2 * reg * W   # gradient with respect to W

        return loss, dW

#     def default_svm_loss_gradient(self, W, X, y, output, reg):
#         N = X.shape[0]
#         delta = 1  # Delta value for hinge loss
        
#         correct_scores = output[np.arange(N), y][:, np.newaxis]
#         margins = np.maximum(0, output - correct_scores + delta)
#         margins[np.arange(N), y] = 0
        
#         loss = np.sum(margins) / N
#         loss += reg * np.sum(W * W)
        
#         binary = margins > 0
#         binary[np.arange(N), y] = -np.sum(binary, axis=1)

#         dW = np.dot(X.T, binary)
#         dW /= N
#         dW += 2 * reg * W

#         return loss, dW

# Usage Example
# model = GeneralizedLinearModel()
# model.train(X_train, y_train)
# predictions = model.predict(X_test)


In [67]:
# X_train_flattened = X_train.reshape(X_train.shape[0], -1)  # Flatten the images
from sklearn import datasets

# Load the Iris dataset
# iris = datasets.load_iris()
# X_iris = iris.data  # Features
# y_iris = iris.target  # Target labels

# Now X_iris contains the features and y_iris contains the target labels
X_flattened = X.reshape(X.shape[0], -1)  # This will change the shape to (60000, 3072)

# Then use the flattened data for training
model = GeneralizedLinearModel()
model.train(X_flattened, y)
# predictions = model.predict(X_test)


(3072,) (60000, 3072)


IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed