# Building and training a KNN model on a limited MNIST dataset

In [0]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [0]:
!pip install scikit-learn

In [16]:
import pickle
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive/"
base_dir = root_dir + 'fastai-v3/'

Mounted at /content/gdrive


In [2]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

train_samples,seed = 450, 8080

X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
X = X.reshape((X.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=50, random_state=seed)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

clf = KNeighborsClassifier()

print("Training model...")
clf.fit(X_train, y_train)

print("Performing predictions...")
predicted = clf.predict(X_test)

print("Accuracy: {}".format(accuracy_score(y_test, predicted)))

Training model...
Performing predictions...
Accuracy: 0.74


In [0]:
pickle.dump(clf, open(base_dir+'KNearest_onfido.sav','wb'))

In [0]:
loaded_model = pickle.load(open(base_dir+'KNearest_onfido.sav','rb'))
loaded_model.score(X_test,y_test)