In [1]:
import numpy as np
import gzip
import time
import warnings
warnings.filterwarnings('ignore')

from sklearn import svm, metrics

def findAccuracy(predicted, actual):
    
    accuracy = 0.0
    counter = 0 
    for i in range (0,predicted.shape[0]):
        if(predicted[i] == actual[i]):
            counter+=1
    accuracy = (counter*100)/predicted.shape[0]
    return accuracy


t = time.time()

In [2]:
print("############ Part 1 - Collect the Datasets ##########")
print("\n\nReading the training data.") 
stats_train = gzip.open('train-images-idx3-ubyte.gz', 'r')
stats_train.read(16)
img_dim = 28
flat_size = img_dim*img_dim
print("\nTraining Data read successfully.")
n_img = 60000

buf = stats_train.read(img_dim * img_dim * n_img)

data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
x_train = data.reshape(n_img, img_dim * img_dim)

stats_labels = gzip.open('train-labels-idx1-ubyte.gz','r')

stats_labels.read(8)
buf_labels = stats_labels.read(1 * 60000)
y_train = np.frombuffer(buf_labels, dtype=np.uint8).astype(np.int64)

print("\n\nReading the testing data.")  
stats_test = gzip.open('t10k-images-idx3-ubyte.gz', 'r')
stats_test.read(16)
n_img_test = 10000
buf_test = stats_test.read(img_dim * img_dim * n_img_test)
print("\nTesting Data read successfully.")

data_test = np.frombuffer(buf_test, dtype=np.uint8).astype(np.float32)
x_test = data_test.reshape(n_img_test, img_dim * img_dim)

stats_test_labels = gzip.open('t10k-labels-idx1-ubyte.gz','r')

stats_test_labels.read(8)
buf_test_labels = stats_test_labels.read(1 * 10000)
y_test = np.frombuffer(buf_test_labels, dtype=np.uint8).astype(np.int64)

print("\n\n############ Part 1 Completed ##########")


############ Part 1 - Collect the Datasets ##########


Reading the training data.

Training Data read successfully.


Reading the testing data.

Testing Data read successfully.


############ Part 1 Completed ##########


In [None]:
print("############ Part 2 - Developing the model classifier #################")

# Initialize the SVC classifier
classifier = svm.LinearSVC()


## Train the classifier on training set
classifier.fit(x_train, y_train)

## Pefrorm prediction by the classifier
pred_results = classifier.predict(x_test)

print("\n\nTIME  = ", time.time() - t)

accuracy = findAccuracy(pred_results, y_test)

print("\n\nAccuracy = ", accuracy)

print("############ Part 2 Completed ################")


############ Part 2 - Developing the model classifier #################


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

print("############ Part 3 - Performing cross-validation and hyper-parameter tuning ###############")
steps = [('scaler', StandardScaler()), ('SVM', SVC(kernel='poly'))]
pipeline = Pipeline(steps)

C_params = [0.001, 0.1, 0.5, 10, 100]
gamma_params = [0.01, 0.1, 1, 5, 10]
parameters = {'SVM__C':C_params, 'SVM__gamma':gamma_params}

classifier = GridSearchCV(pipeline, param_grid=parameters, cv=5)

classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)

print("\n\nTime taken for hyper-parameter tuning : ", time.time() - t)
print ("\n\nBest parameters found by GridSearchCV: ", classifier.best_params_)
print("\n\nScore  = %3.5f", classifier.score(x_test, y_test))


accuracy = findAccuracy(y_pred, y_test)

print("\n\nAccuracy calculated by self-defined function = ", accuracy)
print("############ Part 3 - Completed ###############")

############ Part 3 - Performing cross-validation and hyper-parameter tuning ###############
