In [1]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import joblib

#1. load dataset
digits = load_digits()
#print(digits.data.shape)

X = digits.data
y = digits.target

#2. clean the data - no need to clean since it is from sklearn lib
#digits.feature_names [pixel_0_0, pixel_0_1...pixel_7_7]
#digits.target_names [0,1,2,3,4,5,6,7,8,9]
#y.shape 1797

#3. split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)

#4. create and train the model
knc = KNeighborsClassifier(n_neighbors=4)
knc.fit(X_train, y_train)

#5. predict the result of the test dataset and then compare 
#how our maodel predicted against the known y_test dataset
y_pred = knc.predict(X_test)
acc = metrics.accuracy_score(y_test, y_pred)
print(f'Model accuracy: {acc}')

# if (acc > 0.98):
#     # save the model so we don't have to repeat the resource intensive training
#     model_name = 'ml_digits_model_test35_8knn.joblib'
#     joblib.dump(knc, model_name)
#     print(f'Model persistance successfull: {model_name}')



Model accuracy: 0.9809220985691574


In [2]:
# function to convert one image file to vector
import numpy as np
from PIL import Image
from PIL import ImageOps as imgops
import joblib
from sklearn.datasets import load_digits
import glob

def get_sample_vector():

    try:
        #read the sample.png image 
        img = imgops.grayscale(Image.open('.\\images\\sample7.png'))
        img.show()
    except:
        print('sample.png not found! Expect bad things!')
        return []
    
    #convert to the img to a vector
    img88 = img.resize((8,8))
    arr88 = np.array(img88)
    flat_arr88 = arr88.ravel()
    return flat_arr88



###Function to convert ALL the images in the images subfolder to 1D vector

In [3]:

def get_image_vector_dict():
    file_list = glob.glob("images\\*.png")

    img_vector_dict = {}

    for f in file_list:
        try:
            #read the sample.png image 
            img = imgops.grayscale(Image.open(f))
        except:
            print(f'ERROR! Unable to open {f}')

        #convert the img to a vector
        img88 = img.resize((8,8))
        arr88 = np.array(img88)
        flat_arr88 = arr88.ravel()
        #print(flat_arr88)
        #print(flat_arr88.shape)
        img_vector_dict[f] = flat_arr88

    #print(img_vector_dict)
    return img_vector_dict


In [8]:
def extract_digit(file_name):
    a = file_name.split("\\")
    b = a[1].split('.')
    return (b[0])[-1]


def validate_predictions(predictions):
    print('predictions:', predictions)
    print('|Expected|Predicted|Pass/Fail|')
    for f_nm, pred in predictions.items():
        expected = extract_digit(f_nm)
        pf = 'PASS' if int(expected) == int(pred) else 'FAIL'
        print(f'|{expected}|{pred}|{pf}')



In [9]:
# Using the model.....
# import numpy as np
# from PIL import Image
# from PIL import ImageOps as imgops
# import joblib
# from sklearn.datasets import load_digits
# import glob


# load the sample
img_dict = get_image_vector_dict()
#print(len(sample))
if len(img_dict) < 1 or len(img_dict.values()) < 1:
    print('Sample image error!')
    exit(404)

# load digits dataset so we can look up our predicted target
#digits = load_digits() - loaded earlier in the notebook 

predictions = {}
# loop thru the vectors and run them through the prediction model 
for img_nm, smpl in img_dict.items():
    p = knc.predict([smpl])
    predictions[img_nm] = digits.target_names[p[0]]

validate_predictions(predictions)
#print('predictions:', predictions)
print('The End!')


predictions: {'images\\sample1.png': 0, 'images\\sample2.png': 1, 'images\\sample3.png': 9, 'images\\sample5.png': 9, 'images\\sample6.png': 9, 'images\\sample7.png': 0, 'images\\sample8.png': 9, 'images\\sample9.png': 1}
|Expected|Predicted|Pass/Fail|
|1|0|FAIL
|2|1|FAIL
|3|9|FAIL
|5|9|FAIL
|6|9|FAIL
|7|0|FAIL
|8|9|FAIL
|9|1|FAIL
The End!
