In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# 1️⃣ Load Fashion MNIST Dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [2]:
# Flatten images (28x28 → 784)
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Standardize the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [3]:
# 2️⃣ Reduce Dataset Size to 10,000 samples
subset_size = 10000
x_train_small, _, y_train_small, _ = train_test_split(x_train, y_train, train_size=subset_size, stratify=y_train, random_state=42)

In [4]:
# 3️⃣ Apply PCA (Reduce from 784 → 100 features)
pca = PCA(n_components=100)
x_train_pca = pca.fit_transform(x_train_small)
x_test_pca = pca.transform(x_test)

In [5]:
# ------------------ Train SVM Models ------------------

# 4️⃣ SVM with Linear Kernel (Find Best C)
param_grid_linear = {'C': [0.1, 1, 10, 100]}
linear_svm = RandomizedSearchCV(SVC(kernel='linear'), param_distributions=param_grid_linear, n_iter=3, cv=2, scoring='accuracy', n_jobs=-1)
linear_svm.fit(x_train_pca, y_train_small)

print(f"Best C (Linear SVM): {linear_svm.best_params_['C']}")
y_pred = linear_svm.best_estimator_.predict(x_test_pca)
print(f"Test Accuracy (Linear SVM): {accuracy_score(y_test, y_pred):.4f}")

Best C (Linear SVM): 0.1
Test Accuracy (Linear SVM): 0.8346


In [6]:
# 5️⃣ SVM with Polynomial Kernel (Find Best C & Degree)
param_grid_poly = {'C': [0.1, 1, 10, 100], 'degree': [2, 3, 4]}
poly_svm = RandomizedSearchCV(SVC(kernel='poly'), param_distributions=param_grid_poly, n_iter=5, cv=2, scoring='accuracy', n_jobs=-1)
poly_svm.fit(x_train_pca, y_train_small)

print(f"Best Params (Polynomial SVM): {poly_svm.best_params_}")
y_pred = poly_svm.best_estimator_.predict(x_test_pca)
print(f"Test Accuracy (Polynomial SVM): {accuracy_score(y_test, y_pred):.4f}")

Best Params (Polynomial SVM): {'degree': 3, 'C': 10}
Test Accuracy (Polynomial SVM): 0.8544


In [7]:
# 6️⃣ SVM with RBF Kernel (Find Best C & Gamma)
param_grid_rbf = {'C': [0.1, 1, 10, 100], 'gamma': [0.001, 0.01, 0.1, 1]}
rbf_svm = RandomizedSearchCV(SVC(kernel='rbf'), param_distributions=param_grid_rbf, n_iter=5, cv=2, scoring='accuracy', n_jobs=-1)
rbf_svm.fit(x_train_pca, y_train_small)

print(f"Best Params (RBF SVM): {rbf_svm.best_params_}")
y_pred = rbf_svm.best_estimator_.predict(x_test_pca)
print(f"Test Accuracy (RBF SVM): {accuracy_score(y_test, y_pred):.4f}")

Best Params (RBF SVM): {'gamma': 0.01, 'C': 1}
Test Accuracy (RBF SVM): 0.8073
