## Use handwritten digits dataset by Manning to create a classifier

In [17]:
import numpy as np
from os import listdir
import operator

### Prepare dataset

In [18]:
def img2vector(filename):
    """
        Reads 32 x 32 image files and unrolls them into vectors of 1 x 1024
    """
    returnVect = np.zeros((1, 1024))
    fr = open(filename)
    for i in range(32):
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0, 32*i+j] = int(lineStr[j])
    return returnVect

### Classify function

In [19]:
def classify0(inX, dataSet, labels, k=3):
    m = dataSet.shape[0]
    diffMat = np.tile(inX, (m, 1)) - dataSet
    sqDiffMat = diffMat ** 2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances ** 0.5
    sortedIndicies = distances.argsort()
    classCount = {}
    
    for i in range(k):
        label = labels[sortedIndicies[i]]
        classCount[label] = classCount.get(label, 0) + 1
    
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

### Test

In [20]:
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    
    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print("The classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr))
        
        if(classifierResult != classNumStr):
            errorCount += 1.0
    print("The total number of errors is: %d" % errorCount)
    print("The total error rate is: %f" % (errorCount / float(mTest)))

In [21]:
handwritingClassTest()

The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answer is: 0
The classifier came back with: 0, the real answe

The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answer is: 1
The classifier came back with: 1, the real answe

The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answer is: 3
The classifier came back with: 3, the real answe

The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answer is: 4
The classifier came back with: 4, the real answe

The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answer is: 6
The classifier came back with: 6, the real answe

The classifier came back with: 7, the real answer is: 7
The classifier came back with: 7, the real answer is: 7
The classifier came back with: 7, the real answer is: 7
The classifier came back with: 7, the real answer is: 7
The classifier came back with: 7, the real answer is: 7
The classifier came back with: 7, the real answer is: 7
The classifier came back with: 7, the real answer is: 7
The classifier came back with: 7, the real answer is: 7
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 6, the real answer is: 8
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 8, the real answer is: 8
The classifier came back with: 8, the real answe

The classifier came back with: 9, the real answer is: 9
The classifier came back with: 7, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answer is: 9
The classifier came back with: 9, the real answe