In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy
import operator
import os
import matplotlib.pyplot as plt


In [None]:
# 将图像数据转换为（1，1024）向量 Convert the image data to a (1, 1024) vector
def img2vector(filename):
    returnVect = numpy.zeros((1, 1024))
    file = open(filename)
    for i in range(32):
        lineStr = file.readline()
        for j in range(32):
            returnVect[0, 32 * i + j] = int(lineStr[j])
    return returnVect

In [None]:
# kNN分类器 kNN Classifier
def classifier(inX, dataSet, labels, k):
    #numpy中shape[0]返回数组的行数，shape[1]返回列数 In numpy, shape[0] returns the number of rows in the array, and shape[1] returns the number of columns
    #MDS降维操作 MDS Dimensionality Reduction Operation
    dataSetSize = dataSet.shape[0]
    #去逆矩阵 De-invert matrix
    diffMat = numpy.tile(inX, (dataSetSize, 1)) - dataSet
    #二维特征相减后乘方 Subtract two-dimensional features and then square them
    sqDiffMat = diffMat ** 2
    #计算距离 Calculating distance
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances ** 0.5
    print ("distances:",distances)
    #返回distance中元素从小到大排序后的索引 Returns the index of the elements in distance sorted from small to large
    sortedDistIndicies = distances.argsort()
    print ("sortDistance:",sortedDistIndicies)
    classCount = {}
    for i in range(k):
        #取出前k个元素的类别 Take out the categories of the first k elements
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
    #reverse降序排序字典 reverse sort dictionary in descending order
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

In [None]:
# 测试手写数字识别代码 Test the handwritten digit recognition code
def handWritingClassTest(k):
    #训练部分 Training
    hwLabels = []
    trainingFileList = os.listdir('knn-digits/trainingDigits')
    m = len(trainingFileList)
    trainingMat = numpy.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]  
        try:
            classNumStr = int(fileStr.split('_')[0])
        except Exception as e:
            print('Error:', e)
 
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector("knn-digits/trainingDigits/%s" % fileNameStr)
    
    #测试数据分类结果 Test data classification results
    testFileList = os.listdir('knn-digits/testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        try:
            classNumStr = int(fileStr.split('_')[0])
        except:
            print(fileStr.split('_')[0])
            continue
        vectorTest = img2vector("knn-digits/testDigits/%s" % fileNameStr)
        result = classifier(vectorTest, trainingMat, hwLabels, k)
        print("The classification result is: %d, the true result is: %d" % (result, classNumStr))
        if result != classNumStr:
            errorCount += 1.0
    print("Total Errors:%d" % errorCount)
    print("Error rate:%f" % (errorCount / mTest))
    return errorCount

In [2]:
# 这里是为了测试取不同的k值，识别的效果如何 Here we want to test the recognition effect with different k values.
def selectK():
    x = list()
    y = list()
    for i in range(1, 5):
        x.append(int(i))
        y.append(int(handWritingClassTest(i)))
    plt.plot(x, y)

    plt.show()

In [1]:
# 开始测试，会生成折线图 Start the test and a line chart will be generated
selectK()

NameError: name 'selectK' is not defined

# 测试证明，k选3效果比较好，直接用 Tests have shown that k-select-3 works better.

In [None]:
handWritingClassTest(3)