In [16]:
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import time

In [17]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [18]:
def filter_digits(x, y, digits):
    mask = np.isin(y, digits)
    return x[mask], y[mask]

digits_of_interest = [2, 3, 8, 9]

x_train, y_train = filter_digits(x_train, y_train, digits_of_interest)
x_test, y_test = filter_digits(x_test, y_test, digits_of_interest)


In [19]:
x_train = x_train.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

x_train = x_train.astype(np.float32) / 255
x_test = x_test.astype(np.float32) / 255


In [20]:
from sklearn.model_selection import GridSearchCV

svm_model = LinearSVC(C=1.0, dual=False, max_iter=10000)

param_grid = {
    'C': [0.01, 0.1, 1, 10, 100]
}
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='accuracy')
# Start timing the training process
start_time = time.time()

# Fit grid search
grid_search.fit(x_train, y_train)

# End timing the training process
end_time = time.time()
training_time = end_time - start_time

In [21]:
print("Best parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

# Predict on the training data using the best parameters
train_predictions = best_model.predict(x_train)
train_accuracy = accuracy_score(y_train, train_predictions)

# Predict on the test data using the best parameters
test_predictions = best_model.predict(x_test)
test_accuracy = accuracy_score(y_test, test_predictions)

print(f"Training Accuracy with best C: {train_accuracy:.3f}")
print(f"Test Accuracy with best C: {test_accuracy:.3f}")
print(f"Training time: {training_time:.2f} seconds")


Best parameters: {'C': 0.01}
Training Accuracy with best C: 0.948
Test Accuracy with best C: 0.945
Training time: 131.75 seconds
