# **Image Classification using Support Vector Machines**

In [12]:
import time
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Subset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import BaggingClassifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"CUDA is {'' if torch.cuda.is_available() else 'not '}available")

CUDA is not available


## **Load CIFAR-10**

In [2]:
train_dataset = CIFAR10(
    root="./data2",
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
)

test_dataset = CIFAR10(
    root="./test2",
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
print(train_dataset.data.shape)
print(np.array(train_dataset.targets).shape)

(50000, 32, 32, 3)
(50000,)


In [5]:
train_dataset.data = train_dataset.data / 255.0 * 2.0 - 1.0
test_dataset.data = test_dataset.data / 255.0 * 2.0 - 1.0

In [6]:
train_dataset.targets = np.array(train_dataset.targets)
test_dataset.targets = np.array(test_dataset.targets)

train_dataset.data = train_dataset.data.reshape(train_dataset.data.shape[0], -1)
test_dataset.data = test_dataset.data.reshape(test_dataset.data.shape[0], -1)

In [7]:
print(train_dataset.data.shape)
print(train_dataset.targets.shape)

(50000, 3072)
(50000,)


In [14]:
nsamples = train_dataset.data.shape[0]
k_folds = 5
fold_size = nsamples // k_folds
indices = np.arange(nsamples)
n_estimators = 10

regularizers = [1.0, 0.7]
kernels = ["rbf", "linear"]

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_dataset.targets),
    y=train_dataset.targets
)

In [15]:
models = {}

for regularizer in regularizers:
    for kernel in kernels:
        
        svcmodel = OneVsRestClassifier(
            BaggingClassifier(
                SVC(C=regularizer, kernel = kernel), 
                max_samples = 1.0 / n_estimators,
                n_estimators = n_estimators
            )
        )

        svcmodel.fit(train_dataset.data, np.array(train_dataset.targets))
        models[(regularizer, kernel)] = svcmodel
        

In [None]:
y_pred = svcmodel.predict(test_dataset.data)

In [None]:
accuracy_score(test_dataset.targets, y_pred)

0.5529

TODO: 
1. Bagging classifier to train each SVC on subset of dataset in parallel
2. implement k-Fold Cross Validation
3. One hot encoding
4. PCA