In [5]:
import numpy as np
import operator
import os 

# 使用kNN实现手写数字的识别

In [30]:
# 将数据转化为向量
def img2vec(file_path):
    result = np.zeros((1,1024))
    with open(file_path) as f:
#         调用一次readLines()之后，文件指针就会移动到末尾。这时调用read
#         将无法读取到文字
#         res = f.readlines()
        for i in range(32):
            line = f.readline()
            for j in range(32):
                # 因为读取进来的是一个字符串，所以只能一个一个的赋值
                result[0,32*i+j] = int(line[j])
    return result


In [31]:
img2vec('./digits/trainingDigits/0_0.txt')

array([[0., 0., 0., ..., 0., 0., 0.]])

In [32]:
# 定义kNN算法
def kNN(test_x, data_set, labels, k):
     # test_x是一条待测试的数据，data_set是训练集，lebels是训练样本标签，k是取的最近邻个数，k必须大于0
    num_data_set = data_set.shape[0]
    diff_mat = np.tile(test_x, (num_data_set,1))-data_set
    # 计算测试数据和训练集中各个数据之间的欧式距离
    distances = (diff_mat**2).sum(axis=1)**0.5
    # 根据记录进行排序，并返回排序后的索引位置
    indicies_sorted_distance = distances.argsort()
    # 存储k个最邻近的label的各自的出现次数
    label_counter = {}
    for i in range(k):
        # 获得所属的label
        voted_label = labels[indicies_sorted_distance[i]]
        # 根据label在相应的类别上，加1
        label_counter[voted_label] = label_counter.get(voted_label,0) +1
    # operator.itemgetter 获取对象的哪个维度的数据
    # operator.itemgetter（0）根据字典的键排序
    # operator.itemgetter（1）根据字典的值排序
    # 最后返回，k近邻中岁数类别最多的那一类,下标从1开始
    return sorted(label_counter.items(),
                  key=operator.itemgetter(1),reverse=True)[0][0]

In [33]:
# 定义识别手写数字函数

def hand_writing_recognition(k=3):
    train_files = os.listdir('./digits/trainingDigits')
    num_train = len(train_files)
    train_data_set = np.zeros((num_train,1024))
    train_labels = []
    for i in range(num_train):
        file_name = train_files[i]
        # 去掉文件后缀名
        # 从文件名提取出类型
        label = int((file_name.split('.')[0]).split('_')[0])
        train_labels.append(label)
        train_data_set[i,:] = img2vec("./digits/trainingDigits/{}".format(file_name))
    # 测试集
    test_files =  os.listdir('./digits/testDigits')
    error_counter = 0.0
    num_test = len(test_files)
    for i in range(num_test):
        file_name = test_files[i]
        # 去掉文件后缀名
        # 从文件名提取出类型
        label = int((file_name.split('.')[0]).split('_')[0])
        test_data = img2vec('./digits/testDigits/{}'.format(file_name))
        predict_result = kNN(test_data, train_data_set,train_labels,k)
        print("the classifier came back with: %d, the real answer is: %d" 
              % (predict_result, label))
        if predict_result != label:
            error_counter += 1.0
    return error_counter, error_counter/float(num_test)


In [35]:
error_counter, error_rate = hand_writing_recognition(4)

print("\nthe total number of errors is: %d" % error_counter)
print("\nthe total error rate is: %f" % error_rate)

the classifier came back with: 9, the real answer is: 9
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 8, the real answer is: 8
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 4, the real answe

the classifier came back with: 1, the real answer is: 1
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 4, the real answe

the classifier came back with: 4, the real answer is: 4
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 1, the real answe

the classifier came back with: 2, the real answer is: 2
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 8, the real answer is: 8
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 7, the real answer is: 1
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 5, the real answer is: 5
the classifier came back with: 5, the real answe

the classifier came back with: 5, the real answer is: 5
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 3, the real answer is: 3
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 4, the real answe

the classifier came back with: 7, the real answer is: 7
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 2, the real answer is: 2
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 8, the real answer is: 8
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 4, the real answer is: 4
the classifier came back with: 0, the real answer is: 0
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 6, the real answer is: 6
the classifier came back with: 8, the real answer is: 8
the classifier came back with: 9, the real answer is: 9
the classifier came back with: 9, the real answe

the classifier came back with: 6, the real answer is: 6
the classifier came back with: 1, the real answer is: 1
the classifier came back with: 7, the real answer is: 7
the classifier came back with: 2, the real answer is: 2

the total number of errors is: 11

the total error rate is: 0.011628
