In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import idx2numpy

# Load Fashion-MNIST dataset from idx files
train_images_path = '/mnt/data/train-images-idx3-ubyte'
train_labels_path = '/mnt/data/train-labels-idx1-ubyte'
test_images_path = '/mnt/data/t10k-images-idx3-ubyte'
test_labels_path = '/mnt/data/t10k-labels-idx1-ubyte'

x_train = idx2numpy.convert_from_file(train_images_path)
y_train = idx2numpy.convert_from_file(train_labels_path)
x_test = idx2numpy.convert_from_file(test_images_path)
y_test = idx2numpy.convert_from_file(test_labels_path)

# Flatten images
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Normalize data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Function to train SVM and find best hyperparameters
def train_svm(kernel, param_grid):
    svm = GridSearchCV(SVC(kernel=kernel), param_grid, cv=3, verbose=2, n_jobs=-1)
    svm.fit(x_train, y_train)
    best_params = svm.best_params_
    accuracy = accuracy_score(y_test, svm.best_estimator_.predict(x_test))
    print(f"Best parameters for {kernel} SVM:", best_params)
    print(f"Accuracy on test set: {accuracy:.4f}")
    return best_params, accuracy

# Train SVM with Linear Kernel
linear_params = {'C': [0.01, 0.1, 1, 10, 100]}
train_svm('linear', linear_params)

# Train SVM with Polynomial Kernel
poly_params = {'C': [0.1, 1, 10], 'degree': [2, 3, 4]}
train_svm('poly', poly_params)

# Train SVM with RBF Kernel
rbf_params = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto', 0.01, 0.1, 1]}
train_svm('rbf', rbf_params)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load dataset from CSV files
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# Data Cleaning: Handling missing values and duplicates
train_df.drop_duplicates(inplace=True)
test_df.drop_duplicates(inplace=True)

train_df.dropna(inplace=True)
test_df.dropna(inplace=True)

# Splitting features and labels
x_train = train_df.drop(columns=['label'])
y_train = train_df['label']
x_test = test_df.drop(columns=['label'])
y_test = test_df['label']

# Normalize data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Function to train SVM and find best hyperparameters
def train_svm(kernel, param_grid):
    svm = GridSearchCV(SVC(kernel=kernel), param_grid, cv=3, verbose=2, n_jobs=-1)
    svm.fit(x_train, y_train)
    best_params = svm.best_params_
    accuracy = accuracy_score(y_test, svm.best_estimator_.predict(x_test))
    print(f"Best parameters for {kernel} SVM:", best_params)
    print(f"Accuracy on test set: {accuracy:.4f}")
    return best_params, accuracy

# Train SVM with Linear Kernel
linear_params = {'C': [0.01, 0.1, 1, 10, 100]}
train_svm('linear', linear_params)

# Train SVM with Polynomial Kernel
poly_params = {'C': [0.1, 1, 10], 'degree': [2, 3, 4]}
train_svm('poly', poly_params)

# Train SVM with RBF Kernel
rbf_params = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto', 0.01, 0.1, 1]}
train_svm('rbf', rbf_params)

Fitting 3 folds for each of 5 candidates, totalling 15 fits
