In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import operator
import os

In [2]:
# 将图像数据转换为（1，1024）向量 Convert the image data to a (1, 1024) vector
def img2vector(filename): 
    returnVect = np.zeros((1, 1024))
    file = open(filename)
    for i in range(32):
        lineStr = file.readline()
        for j in range(32):
            returnVect[0, 32 * i + j] = int(lineStr[j])
    return returnVect

In [3]:
# KNN分类器 KNN Classifier
def classifier(inX, dataSet, labels, k):
    
    #numpy中shape[0]返回数组的行数，shape[1]返回列数 In numpy, shape[0] returns the number of rows in the array, and shape[1] returns the number of columns
    #MDS降维操作 MDS Dimensionality Reduction Operation
    dataSetSize = dataSet.shape[0]
    #去逆矩阵 De-invert matrix
    diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
    #二维特征相减后乘方 Subtract two-dimensional features and then square them
    sqDiffMat = diffMat ** 2
    #计算距离 Calculating distance
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances ** 0.5
    print ("distances:",distances)
    #返回distance中元素从小到大排序后的索引 Returns the index of the elements in distance sorted from small to large
    sortedDistIndicies = distances.argsort()
    print ("sortDistance:",sortedDistIndicies)
    classCount = {}
    for i in range(k):
        #取出前k个元素的类别 Take out the categories of the first k elements
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
    #reverse降序排序字典 reverse sort dictionary in descending order
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0][0]

# 下面单独测试一张图片，看下识别结果是怎么样的 Let's test a picture separately to see what the recognition result is.

In [4]:
#图像数据转换为txt文件 Convert image data to txt file

from PIL import Image
"""
    将图像数据转换为txt文件 Convert image data to txt file
    :param img_path: 图像文件路径 param img_path: image file path
    :type txt_name: 输出txt文件路径 type txt_name: output txt file path
"""
def img2txt(img_path, txt_name):
    
    im = Image.open(img_path).convert('1').resize((32, 32))  # type:Image.Image
    data = np.asarray(im)
    np.savetxt(txt_name, data, fmt='%d', delimiter='')

In [5]:
#转换图片转化成32*32数组 Convert the image into a 32*32 array

img2txt("8.png", "8.txt")

In [6]:
#训练部分 Training

hwLabels = []
trainingFileList = os.listdir('knn-digits/trainingDigits')
m = len(trainingFileList)
trainingMat = np.zeros((m, 1024))
for i in range(m):
    fileNameStr = trainingFileList[i]
    fileStr = fileNameStr.split('.')[0]  
    try:
#         if(fileStr.split('_')[0] == ''):
#             continue
#         else:
        classNumStr = int(fileStr.split('_')[0])
    except Exception as e:
        print('Error:', e)

    hwLabels.append(classNumStr)
    trainingMat[i, :] = img2vector("knn-digits/trainingDigits/%s" % fileNameStr)

In [7]:
#测试数据分类结果 Test data classification results

fileStr = "8.txt"
classNumStr = int(fileStr.split('.')[0])
vectorTest = img2vector("./8.txt")

#print(vectorTest[0][])

result = classifier(vectorTest, trainingMat, hwLabels, 3)  # k=3
print("The classification result is: %d, the true result is: %d" % (result, classNumStr))



distances: [18.94729532 17.66352173 16.1245155  ... 16.79285562 17.23368794
 17.4642492 ]
sortDistance: [ 103  861 1182 ... 1291  658 1687]
The classification result is: 8, the true result is: 8
