In [None]:
# Run the following cell if using Google Colab

from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/cs340/project/models

!pip install git+https://github.com/openai/CLIP.git
!pip install cuml-cu12

In [None]:
import clip_feature_extractor
import numpy as np

from cuml.svm import SVC
from cuml.metrics import accuracy_score
import cupy as cp
import cudf

%load_ext autoreload
%autoreload 2

In [2]:
X_train_CIFAR100, y_train_CIFAR100, X_test_CIFAR100, y_test_CIFAR100 = clip_feature_extractor.get_CIFAR100_features()


Files already downloaded and verified
Files already downloaded and verified
Extracting features from CIFAR100 dataset
Loaded previously extracted features from disk.


In [None]:
def train_svm_cuml(X_train, y_train, X_test, y_test, C=1.0, kernel='rbf', degree=3, gamma='scale'):
    """
    Train a Support Vector Machine classifier using RAPIDS cuML with GPU acceleration.

    Parameters:
    - X_train (np.ndarray or cp.ndarray): Training feature data.
    - y_train (np.ndarray or cp.ndarray): Training labels.
    - X_test (np.ndarray or cp.ndarray): Testing feature data.
    - y_test (np.ndarray or cp.ndarray): Testing labels.
    - C (float): Regularization parameter. The strength of the regularization is inversely proportional to C.
    - kernel (str): Specifies the kernel type to be used in the algorithm.
    - degree (int): Degree of the polynomial kernel function ('poly'). Ignored by all other kernels.
    - gamma (str or float): Kernel coefficient for 'rbf', 'poly', and 'sigmoid'.

    Returns:
    - accuracy (float): Classification accuracy on the test set (0 to 1).
    - accuracy_percentage (float): Classification accuracy in percentage (0 to 100).
    - model (cuml.svm.SVC): Trained SVM model.
    """

    def convert_and_cast(data, dtype):
        """
        Convert data to CuPy array if not already and cast to the specified dtype.

        Parameters:
        - data (np.ndarray or cp.ndarray): Input data.
        - dtype (data-type): Desired data type.

        Returns:
        - cp.ndarray: Converted and casted CuPy array.
        """
        if not isinstance(data, cp.ndarray):
            data = cp.asarray(data)
        return data.astype(dtype)

    # Combine conversion and type casting for all datasets
    X_train = convert_and_cast(X_train, cp.float32)
    X_test = convert_and_cast(X_test, cp.float32)
    y_train = convert_and_cast(y_train, cp.int32)
    y_test = convert_and_cast(y_test, cp.int32)

    # Initialize the SVM classifier
    svm = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)

    # Fit the model on the training data
    svm.fit(X_train, y_train)

    # Predict on the test data
    y_pred = svm.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_percentage = accuracy * 100

    return accuracy, accuracy_percentage, svm

In [None]:
accuracy, accuracy_percentage, model = train_svm_cuml(X_train_CIFAR100, y_train_CIFAR100, X_test_CIFAR100, y_test_CIFAR100)