## Loading the data

In [1]:
import torch
from sklearn.datasets import load_breast_cancer
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from scipy.stats import mode
import random

# Load the breast cancer dataset
cancer = load_breast_cancer()
X_cancer = cancer['data']
y_cancer = cancer['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, test_size=0.2, random_state=42)

X_train_mu = np.mean(X_train, axis=0)
X_train_std = np.std(X_train, axis=0)
X_train_normalized = (X_train - X_train_mu) / X_train_std
X_test_normalized = (X_test - X_train_mu ) / X_train_std

## Coding the class in PyTorch to use CUDA and evaluate the code on GPU, instead of CPU

In [2]:
class CudaKNNClassifier(torch.nn.Module):
    def __init__(self, k=5):
        super(CudaKNNClassifier, self).__init__()
        self.k = k
        self.X_train = None
        self.y_train = None

    def _validate_input(self, X, y=None):
        if not isinstance(X, torch.Tensor):
            raise ValueError("Input data must be a PyTorch tensor.")
        if y is not None and X.shape[0] != y.shape[0]:
            raise ValueError("Number of samples in X and y must be equal.")

    def fit(self, X_train, y_train):
        self._validate_input(X_train, y_train)
        self.X_train = X_train.to(self.device)  # Move to device (CPU/GPU)
        self.y_train = y_train.to(self.device)  # Move to device (CPU/GPU)

    def predict(self, X_test):
        self._validate_input(X_test, None)

        # Ensure X_train and y_train are on the same device as X_test
        if self.X_train.device != X_test.device:
            self.X_train = self.X_train.to(X_test.device)
            self.y_train = self.y_train.to(X_test.device)

        # Distance calculation, neighbor finding, voting
        X_trainNew = self.X_train[None, :, :] - X_test[:, None, :]
        d_testTrain = torch.sqrt(torch.sum(torch.pow(X_trainNew, 2), dim=2))
        idxs = torch.argsort(d_testTrain, dim=1)

        y_train_casted = self.y_train.repeat(X_trainNew.shape[0], 1)

        y_train_sorted = torch.gather(y_train_casted, dim=1, index=idxs)
        y_train_topK = y_train_sorted[:, :self.k]
        y_test_pred = torch.mode(y_train_topK, dim=1)[0]

        return y_test_pred

    def _voting_func(self, arr):
        return torch.mode(arr)[0]

    @property
    def device(self):
        return torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Example Usage
X_train = torch.tensor(X_train_normalized, dtype=torch.float)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test_normalized, dtype=torch.float)
y_test = torch.tensor(y_test, dtype=torch.long)

cudaModelKNN = CudaKNNClassifier(k=5)
cudaModelKNN.fit(X_train, y_train)
y_pred = cudaModelKNN.predict(X_test)
np.mean(y_pred.cpu().numpy() == y_test.cpu().numpy())

0.9473684210526315

## Using for loop for each sample

In [4]:
class MyKNN(torch.nn.Module):
    def __init__(self, X_train, y_train, k=3):
        super(MyKNN, self).__init__()
        self.k = k
        self.X_train = X_train.clone().detach().to(self.device, dtype=torch.float)
        self.y_train = y_train.clone().detach().to(self.device, dtype=torch.long)

    @property
    def device(self):
        return torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def _euclidean_distance(self, X, sample):
        sample = sample.unsqueeze(0)  # Shape (1, feature_dim)
        squared_diff = torch.sum((X - sample) ** 2, dim=1)
        return torch.sqrt(squared_diff)

    def _validate_input(self, X, y=None):
        if not isinstance(X, torch.Tensor):
            raise ValueError("Input data must be a PyTorch tensor.")
        if y is not None and X.shape[0] != y.shape[0]:
            raise ValueError("Number of samples in X and y must be equal.")

    def predict_sample(self, sample, type='euclidean', n=None):
        if type != 'euclidean':
            raise ValueError("Only Euclidean distance is supported in PyTorch implementation.")

        distances = self._euclidean_distance(self.X_train, sample)

        _, indices = torch.sort(distances, dim=0, descending=True)  # Sort by descending distance
        y_train_sorted = self.y_train[indices]
        y_train_neighbors = y_train_sorted[:self.k]
        unique_labels, counts = torch.unique(y_train_neighbors, return_counts=True)
        return unique_labels[torch.argmax(counts)]

    def predict_dataset(self, dataset, type='euclidean', n=None):
        # Convert dataset to PyTorch tensor and move to device
        dataset_tensor = dataset.clone().detach().to(device=self.device, dtype=torch.float)
        # Call predict_sample on each sample in the dataset
        predictions = torch.stack([self.predict_sample(x) for x in dataset_tensor])
        return predictions.cpu().numpy()  # Convert back to NumPy array

In [5]:
# Example Usage
modelKNNLoop = MyKNN(X_train, y_train, k=3)
y_pred = modelKNNLoop.predict_dataset(X_test)

## Benchmarking

In [6]:
k = 21
features = 25
train_samples = 800
test_samples = 200

X_train = torch.rand(train_samples, features)
X_train = (X_train - torch.mean(X_train, dim=0)) / torch.std(X_train, dim=0)
y_train = torch.zeros(train_samples, dtype=torch.long)

X_test = torch.rand(test_samples, features)
X_test = (X_test - torch.mean(X_test, dim=0)) / torch.std(X_test, dim=0)
y_test = torch.zeros(test_samples, dtype=torch.long)

# Example Usage Loop
modelKNNLoop = MyKNN(X_train, y_train, k=k)
y_pred = modelKNNLoop.predict_dataset(X_test)

# Example Usage CUDA
cudaModelKNN = CudaKNNClassifier(k=k)
cudaModelKNN.fit(X_train, y_train)
y_pred = cudaModelKNN.predict(X_test)

In [7]:
print(f"KNNPredictor with for loop (k={k}): ")
%timeit -n 10 modelKNNLoop.predict_dataset(X_test)

print(f"cudaModel KNN (k={k}): ")
%timeit -n 10 cudaModelKNN.predict(X_test)

KNNPredictor with for loop (k=21): 
102 ms ± 23.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
cudaModel KNN (k=21): 
41 ms ± 3.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### And with a lower k:

In [8]:
k = 3
# Example Usage Loop
modelKNNLoop = MyKNN(X_train, y_train, k=k)
y_pred = modelKNNLoop.predict_dataset(X_test)

# Example Usage CUDA
cudaModelKNN = CudaKNNClassifier(k=k)
cudaModelKNN.fit(X_train, y_train)
y_pred = cudaModelKNN.predict(X_test)

In [9]:
print(f"KNNPredictor with for loop (k={k}): ")
%timeit -n 10 modelKNNLoop.predict_dataset(X_test)

print(f"cudaModel KNN (k={k}): ")
%timeit -n 10 cudaModelKNN.predict(X_test)

KNNPredictor with for loop (k=3): 
54.6 ms ± 6.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
cudaModel KNN (k=3): 
28.7 ms ± 779 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### And with a k=1

In [10]:
k = 1
# Example Usage Loop
modelKNNLoop = MyKNN(X_train, y_train, k=k)
y_pred = modelKNNLoop.predict_dataset(X_test)

# Example Usage CUDA
cudaModelKNN = CudaKNNClassifier(k=k)
cudaModelKNN.fit(X_train, y_train)
y_pred = cudaModelKNN.predict(X_test)

In [11]:
print(f"KNNPredictor with for loop (k={k}): ")
%timeit -n 10 modelKNNLoop.predict_dataset(X_test)

print(f"cudaModel KNN (k={k}): ")
%timeit -n 10 cudaModelKNN.predict(X_test)

KNNPredictor with for loop (k=1): 
52.5 ms ± 4.16 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
cudaModel KNN (k=1): 
39.1 ms ± 5.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
