Import required libraries

In [None]:
import numpy as np
import cv2

Define global variables

In [None]:
MIN_CONTOUR_AREA = 100                      # minimum area of valid contour

RESIZED_IMAGE_WIDTH = 20                    # width of resized image
RESIZED_IMAGE_HEIGHT = 30                   # height of resized image

ROWS = 5                                    # number of rows in training image

TRAINING_IMAGES_PATH = 'train_images'                                       # folder with training images
TESTING_IMAGES_PATH = 'test_images'                                         # folder with testing images
TRAINING_ALPHA = TRAINING_IMAGES_PATH + '/' + "train_alpha.png"             # training alphabet image
TRAINING_DIGIT = TRAINING_IMAGES_PATH + '/' + "train_digit.png"             # training digit image
TEST_IMAGE = TESTING_IMAGES_PATH + '/' + "test3.png"                        # test image

Define contour class to store characters in test image

In [None]:
class ContourWithData():
    npaContour = None           # contour
    boundingRect = None         # bounding rect for contour
    intRectX = 0                # bounding rect top left corner x location
    intRectY = 0                # bounding rect top left corner y location
    intRectWidth = 0            # bounding rect width
    intRectHeight = 0           # bounding rect height
    fltArea = 0.0               # area of contour

    # calculate bounding rect info
    def calculateRectTopLeftPointAndWidthAndHeight(self):
        [intX, intY, intWidth, intHeight] = self.boundingRect
        self.intRectX = intX
        self.intRectY = intY
        self.intRectWidth = intWidth
        self.intRectHeight = intHeight

    # this is oversimplified, for a production grade program
    def checkIfContourIsValid(self):
        if self.fltArea < MIN_CONTOUR_AREA:
            return False        # much better validity checking would be necessary
        return True

### TRAIN MODEL

Open training characters image

In [None]:
imgTrainingAlpha = cv2.imread(TRAINING_ALPHA)               # read in training alpha image
imgTrainingDigit = cv2.imread(TRAINING_DIGIT)            # read in training digit image

if imgTrainingAlpha is None:                              # if image was not read successfully
    # print error message to std out
    print("error: alpha image not read from file \n\n")
    # and exit function (which exits program)
    exit()
# end if
if imgTrainingDigit is None:                              # if image was not read successfully
    # print error message to std out
    print("error: digit image not read from file \n\n")
    # and exit function (which exits program)
    exit()
# end if


Modify training alphabet image

In [None]:
imgGrayAlpha = cv2.cvtColor(imgTrainingAlpha, cv2.COLOR_BGR2GRAY)                # get grayscale image
imgBlurredAlpha = cv2.GaussianBlur(imgGrayAlpha, (5, 5), 0)                           # blur
# filter image from grayscale to black and white
imgThreshAlpha = cv2.adaptiveThreshold(imgBlurredAlpha,
                                  # make pixels that pass the threshold full white
                                  255,                                  
                                  # use gaussian rather than mean, seems to give better results
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                  # invert so foreground will be white, background will be black
                                  cv2.THRESH_BINARY_INV,
                                  # size of a pixel neighborhood used to calculate threshold value
                                  11,
                                  # constant subtracted from the mean or weighted mean
                                  2)                                    

Find seperate characters from alphabet image

In [None]:
npaContoursAlpha, _ = cv2.findContours(imgThreshAlpha.copy(),            # input image, make sure to use a copy since the function will modify this image in the course of finding contours
                                       cv2.RETR_EXTERNAL,           # retrieve the outermost contours only
                                       cv2.CHAIN_APPROX_SIMPLE)     # compress horizontal, vertical, and diagonal segments and leave only their end points

Modify training digit image

In [None]:
# get grayscale image
imgGrayDigit = cv2.cvtColor(imgTrainingDigit, cv2.COLOR_BGR2GRAY)
imgBlurredDigit = cv2.GaussianBlur(imgGrayDigit, (5, 5), 0)                        # blur
# filter image from grayscale to black and white
imgThreshDigit = cv2.adaptiveThreshold(imgBlurredDigit,                           # input image
                                  # make pixels that pass the threshold full white
                                  255,                                  
                                  # use gaussian rather than mean, seems to give better results
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                  # invert so foreground will be white, background will be black
                                  cv2.THRESH_BINARY_INV,
                                  # size of a pixel neighborhood used to calculate threshold value
                                  11,
                                  # constant subtracted from the mean or weighted mean
                                  2)                                    

Find separate characters from digit image

In [None]:
npaContoursDigit, _ = cv2.findContours(imgThreshDigit.copy(),            # input image, make sure to use a copy since the function will modify this image in the course of finding contours
                                       cv2.RETR_EXTERNAL,           # retrieve the outermost contours only
                                       cv2.CHAIN_APPROX_SIMPLE)     # compress horizontal, vertical, and diagonal segments and leave only their end points


Declare empty arrays

In [None]:
AlphaContours = []
DigitContours = []

Create objects for alphabet contours

In [None]:
for npaContour in npaContoursAlpha:                              # for each contour
    # instantiate a contour with data object
    contourWithData = ContourWithData()
    # assign contour to contour with data
    contourWithData.npaContour = npaContour
    # get the bounding rect
    contourWithData.boundingRect = cv2.boundingRect(contourWithData.npaContour)
    # get bounding rect info
    contourWithData.calculateRectTopLeftPointAndWidthAndHeight()
    # calculate the contour area
    contourWithData.fltArea = cv2.contourArea(contourWithData.npaContour)
    # add contour with data object to list of all contours with data
    if contourWithData.checkIfContourIsValid():
        AlphaContours.append(contourWithData)
    # end if
# end for

# sort contours from left to right
AlphaContours.sort(key = lambda x: x.intRectX)         

Create objects for digit contours

In [None]:
for npaContour in npaContoursDigit:                              # for each contour
    # instantiate a contour with data object
    contourWithData = ContourWithData()
    # assign contour to contour with data
    contourWithData.npaContour = npaContour
    # get the bounding rect
    contourWithData.boundingRect = cv2.boundingRect(contourWithData.npaContour)
    # get bounding rect info
    contourWithData.calculateRectTopLeftPointAndWidthAndHeight()
    # calculate the contour area
    contourWithData.fltArea = cv2.contourArea(contourWithData.npaContour)
    # add contour with data object to list of all contours with data
    if contourWithData.checkIfContourIsValid():
        DigitContours.append(contourWithData)
    # end if
# end for

# sort contours from left to right
DigitContours.sort(key=lambda x: x.intRectX)

Create empty arrays

In [None]:
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))

# declare empty classifications list, this will be our list of how we are classifying our chars from user input, we will write to file at the end
intClassifications = []

Define Valid Characters which the model will detect

In [None]:
validAlpha = [ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), 
                ord('I'), ord('J'), ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), 
                ord('R'), ord('S'), ord('T'), ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z')]

validDigit = [ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'), ord('0')]

Label alphabet images

In [None]:
index = 0
for npaContour in AlphaContours:                      # for each contour
    char = validAlpha[index // ROWS]
    # crop char out of threshold image
    imgROI = imgThreshAlpha[npaContour.intRectY: npaContour.intRectY + npaContour.intRectHeight,
                            npaContour.intRectX: npaContour.intRectX + npaContour.intRectWidth]
    # resize image, this will be more consistent for recognition and storage
    imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
    # show training numbers image
    intClassifications.append(char)
    # flatten image to 1d numpy array so we can write to file later
    npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
    # add current flattened impage numpy array to list of flattened image numpy arrays
    npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)
    # increment index
    index += 1
# end for

Label digit images

In [None]:
index = 0
for npaContour in DigitContours:                      # for each contour
    char = validDigit[index // ROWS]
    # crop char out of threshold image
    imgROI = imgThreshDigit[npaContour.intRectY: npaContour.intRectY + npaContour.intRectHeight,
                            npaContour.intRectX: npaContour.intRectX + npaContour.intRectWidth]
    # resize image, this will be more consistent for recognition and storage
    imgROIResized = cv2.resize(
        imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
    # show training numbers image
    intClassifications.append(char)
    # flatten image to 1d numpy array so we can write to file later
    npaFlattenedImage = imgROIResized.reshape(
        (1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
    # add current flattened impage numpy array to list of flattened image numpy arrays
    npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)
    # increment index
    index += 1
# end for

Save training data

In [None]:
fltClassifications = np.array(intClassifications, np.float32)

# flatten numpy array of floats to 1d so we can write to file later
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))

print("\n\ntraining complete !!\n")

# write flattened images to file
np.savetxt("classifications.txt", npaClassifications)
np.savetxt("flattened_images.txt", npaFlattenedImages)

# remove windows from memory
cv2.destroyAllWindows()

### TEST MODEL

Open classifications and flattened_images

In [None]:
try:
    # read in training classifications
    npaClassifications = np.loadtxt("classifications.txt", np.float32)
except:
    print("error, unable to open classifications.txt, exiting program\n")
    exit()
# end try

try:
    # read in training images
    npaFlattenedImages = np.loadtxt("flattened_images.txt", np.float32)
except:
    print("error, unable to open flattened_images.txt, exiting program\n")
    exit()
# end try

Reshape numpy array to 1d, necessary to pass to call to train

In [None]:
npaClassifications = npaClassifications.reshape((npaClassifications.size, 1))

Create KNN object

In [None]:
kNearest = cv2.ml.KNearest_create()                   # instantiate KNN object
kNearest.train(npaFlattenedImages, cv2.ml.ROW_SAMPLE, npaClassifications)

Open testing image

In [None]:
imgTestingNumbers = cv2.imread(TEST_IMAGE)           # read in testing image
if imgTestingNumbers is None:                           # if image was not read successfully
    print("error: image not read from file \n\n")
    exit()
# end if

Edit image

In [None]:
# get grayscale image
imgGray = cv2.cvtColor(imgTestingNumbers, cv2.COLOR_BGR2GRAY)
# blur image to reduce noise
imgBlurred = cv2.GaussianBlur(imgGray, (5, 5), 0)                    
# filter image from grayscale to black and white
imgThresh = cv2.adaptiveThreshold(imgBlurred,                       # input image
                                  255,                              # make pixels that pass the threshold full white
                                  # use gaussian rather than mean, seems to give better results
                                  cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                  # invert so foreground will be white, background will be black
                                  cv2.THRESH_BINARY_INV,
                                  # size of a pixel neighborhood used to calculate threshold value
                                  11,
                                  2)                               # constant subtracted from the mean or weighted mean

Get separate contours from testing image

In [None]:
npaContours, npaHierarchy = cv2.findContours(# input image copy since the function will modify this image in the course of finding contours
                                             imgThresh.copy(),          
                                             # retrieve the outermost contours only
                                             cv2.RETR_EXTERNAL,         
                                             # compress horizontal, vertical, and diagonal segments and leave only their end points
                                             cv2.CHAIN_APPROX_SIMPLE)   

Create empty array

In [None]:
allContoursWithData = []                # store all valid contours in testing image

Create objects for each contour

In [None]:
for npaContour in npaContours:                             # for each contour
    # instantiate a contour with data object
    contourWithData = ContourWithData()
    # assign contour to contour with data
    contourWithData.npaContour = npaContour
    # get the bounding rect
    contourWithData.boundingRect = cv2.boundingRect(contourWithData.npaContour)     
    # get bounding rect info
    contourWithData.calculateRectTopLeftPointAndWidthAndHeight()                    
    # calculate the contour area
    contourWithData.fltArea = cv2.contourArea(contourWithData.npaContour)           
    # add contour with data object to list of all contours with data
    if contourWithData.checkIfContourIsValid():
        allContoursWithData.append(contourWithData)
# end for

Sort contours from left to right

In [None]:
# sort contours from left to right
allContoursWithData.sort(key=lambda x: x.intRectX)

Declare final string, this will have the final number sequence by the end of the program

In [None]:
strFinalString = ""

In [None]:
for contourWithData in allContoursWithData:            # for each contour
    # draw rectangle on original testing image
    cv2.rectangle(imgTestingNumbers,                                        
                  # upper left corner
                  (contourWithData.intRectX, contourWithData.intRectY),
                  (contourWithData.intRectX + contourWithData.intRectWidth,
                   contourWithData.intRectY + contourWithData.intRectHeight),       # lower right corner
                  (0, 255, 0),                                                      # green
                  2)                                                                # thickness

    # crop char out of threshold image
    imgROI = imgThresh[contourWithData.intRectY: contourWithData.intRectY + contourWithData.intRectHeight,     
                       contourWithData.intRectX: contourWithData.intRectX + contourWithData.intRectWidth]

    # resize image, this will be more consistent for recognition and storage
    imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))

    # flatten image into 1d numpy array
    npaROIResized = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))

    # convert from 1d numpy array of ints to 1d numpy array of floats
    npaROIResized = np.float32(npaROIResized)

    # call KNN function find_nearest
    retval, npaResults, neigh_resp, dists = kNearest.findNearest(npaROIResized, k=1)     \

    # get character from results
    strCurrentChar = str(chr(int(npaResults[0][0])))

    cv2.imshow("imgTestingNumbers", imgTestingNumbers)
    print(strCurrentChar)
    if cv2.waitKey(0) == 27:
        exit()
    
    # append current char to full string
    strFinalString = strFinalString + strCurrentChar
# end for

print("\n" + strFinalString + "\n")             # show final string
cv2.destroyAllWindows()                         # remove windows from memory
cv2.waitKey(1)