In [15]:
from sklearn.datasets import load_digits

In [16]:
digits = load_digits()

In [17]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    digits.data, digits.target, random_state=11, test_size=0.20)


In [18]:
X_train.shape

(1437, 64)

In [19]:
X_test.shape

(360, 64)

In [20]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

In [21]:
estimators = {
        'KNeighborsClassifier': KNeighborsClassifier(),
        'SVC': SVC(gamma='scale'),
        'GaussianNB': GaussianNB()}

In [22]:
from sklearn.model_selection import KFold

In [23]:
kfold = KFold(n_splits=10, random_state=11, shuffle=True)

In [24]:
from sklearn.model_selection import cross_val_score

In [25]:
for estimator_name, estimator_object in estimators.items():
      kfold = KFold(n_splits=10, random_state=11, shuffle=True)

      scores = cross_val_score(estimator=estimator_object,X=digits.data, y=digits.target, cv=kfold)

      print(f'{estimator_name:>20}: ' + f'mean accuracy={scores.mean():.2%}; ' + f'standard deviation={scores.std():.2%}')

KNeighborsClassifier: mean accuracy=98.72%; standard deviation=0.75%
                 SVC: mean accuracy=98.72%; standard deviation=0.79%
          GaussianNB: mean accuracy=84.48%; standard deviation=3.47%


In [26]:
# Try more classifiers and replicate the above steps:


from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [27]:
# Decision tree
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
print("Decision Tree")
print("Accuracy: ", model.score(X_test, y_test))

predictions = model.predict(X_test)



print(classification_report(y_test, predictions))

Decision Tree
Accuracy:  0.85
              precision    recall  f1-score   support

           0       1.00      0.87      0.93        38
           1       0.86      0.84      0.85        37
           2       0.94      0.82      0.88        39
           3       0.82      0.78      0.80        41
           4       0.88      0.85      0.86        41
           5       0.84      0.96      0.90        27
           6       0.94      1.00      0.97        30
           7       0.85      0.78      0.81        36
           8       0.72      0.85      0.78        34
           9       0.71      0.81      0.76        37

    accuracy                           0.85       360
   macro avg       0.86      0.86      0.85       360
weighted avg       0.86      0.85      0.85       360



In [28]:
model = RandomForestClassifier()
model.fit(X_train, y_train)
print("Random Forest")
print("Accuracy: ", model.score(X_test, y_test))

predictions = model.predict(X_test)

print(classification_report(y_test, predictions))


Random Forest
Accuracy:  0.9722222222222222
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        38
           1       1.00      1.00      1.00        37
           2       1.00      1.00      1.00        39
           3       1.00      0.95      0.97        41
           4       0.97      0.95      0.96        41
           5       0.93      1.00      0.96        27
           6       1.00      0.97      0.98        30
           7       0.88      0.97      0.92        36
           8       0.97      0.97      0.97        34
           9       0.97      0.95      0.96        37

    accuracy                           0.97       360
   macro avg       0.97      0.97      0.97       360
weighted avg       0.97      0.97      0.97       360



In [29]:
# comparison of the classifiers

# KNN is the best classifier for this dataset
# Decision tree and Random Forest are also good classifiers at 84% and 97% accuracy