# Building a linear SVM on a limited MNIST dataset

In [0]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [0]:
!pip install scikit-learn

In [9]:
import pickle
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive/"
base_dir = root_dir + 'fastai-v3/'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [79]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

train_samples,seed = 450, 8080

X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=50, random_state=seed)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

clf = SVC(kernel="linear", C=0.05, probability=True)

print("Training model...")
clf.fit(X_train, y_train)

print("Performing predictions...")
predicted = clf.predict(X_test)

print("Accuracy: {}".format(accuracy_score(y_test, predicted)))

Training model...
Performing predictions...
Accuracy: 0.82


In [0]:
pickle.dump(clf, open(base_dir+'SVM_onfido.sav','wb'))

In [0]:
loaded_model = pickle.load(open(base_dir+'SVM_onfido.sav','rb'))
loaded_model.score(X_test,y_test)

In [0]:
#@title Testing prediction upon single image
from PIL import Image
import numpy as np
from sklearn.preprocessing import scale

img = np.array(Image.open(base_dir+"sample_digit.png").convert("L").resize((28,28),Image.ANTIALIAS)).flatten()
img = img.reshape(1,-1)
img = scale(img,axis=1)
classes = [i for i in range(0,10)]
losses = (loaded_model.predict_proba(img)).flatten()
predictions = sorted(
			zip(classes, map(float, losses)),
			key=lambda p: p[1],
			reverse=True
)
print(predictions)