In [None]:
# Nearest centroid model with PCA

# Import libraries
import keras
from keras.datasets import mnist
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neighbors import NearestCentroid

In [None]:
# Create the needed format for KNN
# Import libraries 
import keras
from keras.datasets import mnist

# Import datasets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1, 28*28)/255.0
x_test = x_test.reshape(-1, 28*28)/255.0

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# PCA Tranformation
from sklearn.decomposition import PCA

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit on training set only.
scaler.fit(x_train)
# Apply transform to both the training set and the test set.
train_img = scaler.transform(x_train)
test_img = scaler.transform(x_test)

pca = PCA(0.95)
pca.fit(train_img)
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)

print("original shape:   ", x_train.shape)
print("transformed shape:", x_train_pca.shape)
print("original shape:   ", x_test.shape)
print("transformed shape:", x_test_pca.shape)

(60000, 784)
(60000,)
(10000, 784)
(10000,)
original shape:    (60000, 784)
transformed shape: (60000, 331)
original shape:    (10000, 784)
transformed shape: (10000, 331)


In [None]:
metric_name = ['euclidean', 'manhattan']
res_dict = {}
for metric in metric_name:
  # define model
  model = NearestCentroid(metric=metric)

  # define model evaluation method
  cv = RepeatedStratifiedKFold(n_splits=60, n_repeats=5, random_state=1)

  # evaluate model
  scores = cross_val_score(model, x_train_pca, y_train, scoring='accuracy', cv=cv, n_jobs=-1)

  name = metric

  # summarize result
  print("Name: " + name)
  print('Mean Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

  res_dict[name] = mean(scores)

Name: euclidean
Mean Accuracy: 0.807 (0.012)
Name: manhattan
Mean Accuracy: 0.820 (0.012)


In [None]:
# make predictions with a nearest centroid model on the test dataset
from sklearn.datasets import make_classification
from sklearn.neighbors import NearestCentroid
from sklearn.metrics import classification_report

# define model
model = NearestCentroid()

# fit model
model.fit(x_train_pca, y_train)

# make a prediction
y_pred = model.predict(x_test_pca)

# Printing Accuracy on Training and Test sets
print(f"Training Set Score : {model.score(x_train_pca, y_train) * 100} %")
print(f"Test Set Score : {model.score(x_test_pca, y_test) * 100} %")
 
# Printing classification report of classifier on the test set set data
print(f"Model Classification Report : \n{classification_report(y_test, model.predict(x_test_pca))}")

Training Set Score : 80.77 %
Test Set Score : 82.07 %
Model Classification Report : 
              precision    recall  f1-score   support

           0       0.91      0.90      0.90       980
           1       0.77      0.96      0.86      1135
           2       0.88      0.76      0.81      1032
           3       0.76      0.81      0.79      1010
           4       0.80      0.83      0.81       982
           5       0.75      0.68      0.72       892
           6       0.88      0.86      0.87       958
           7       0.91      0.83      0.87      1028
           8       0.79      0.74      0.77       974
           9       0.77      0.81      0.79      1009

    accuracy                           0.82     10000
   macro avg       0.82      0.82      0.82     10000
weighted avg       0.82      0.82      0.82     10000

