In [6]:
import time
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import cv2 as cv
from sklearn.svm import SVC

# 读取数据
# # 参考按照 OpenCV 官网方法如下
# data= np.loadtxt('letter-recognition.data', dtype= 'float32', delimiter = ',',
#                     converters= {0: lambda ch: ord(ch)-ord('A')})
# X = data[:,1:]
# y = data[:,0]
data = pd.read_csv('letter-recognition.data',header=None)
X = data.drop(0,axis=1).values.astype(np.float32)
y = np.array([ord(x)-ord('A') for x in data[0]]).astype(np.int)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.50)

#########################################
start1 = time.time()
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train,y_train)
knn_acc=knn.score(X_test,y_test.ravel())*100
end1 = time.time()
print(f"sklearn KNN 识别手写字母集的准确率是{round(knn_acc,2)}%, 测试用时: {round(end1 - start1,2)}秒")

#########################################
start2 = time.time()
knn = cv.ml.KNearest_create()
knn.train(X_train, cv.ml.ROW_SAMPLE, y_train)
ret, result, neighbours, dist = knn.findNearest(X_test, k=5)
correct = np.count_nonzero(result.ravel() == y_test)
opencv_knn_acc = correct*100/10000
end2 = time.time()
print(f"OpenCV KNN 识别手写字母集的准确率是{round(opencv_knn_acc,2)}%, 测试用时: {round(end2 - start2,2)}秒")

#########################################
start3 = time.time()
svc = SVC(C=2.67)
svc.fit(X_train,y_train)
svc_acc=svc.score(X_test,y_test.ravel())*100
end3 = time.time()
print(f"sklearn SVC 识别手写字母集的准确率是{round(svc_acc,2)}%, 测试用时: {round(end3 - start3,2)}秒")

#########################################
start4 = time.time()
svm = cv.ml.SVM_create()
# svm.setKernel(cv.ml.SVM_LINEAR) # 如果使用 linear 我们发现准确率是60%左右
# 经过调研 https://docs.opencv.org/3.4/d1/d2d/classcv_1_1ml_1_1SVM.html
# 我们发现，如果想要使用 rbf 核，应该提供的参数是 CHI2
svm.setKernel(cv.ml.SVM_CHI2)
svm.setType(cv.ml.SVM_C_SVC)
svm.setC(2.67) 
svm.train(X_train, cv.ml.ROW_SAMPLE, y_train)
y_predict = svm.predict(X_test)
# print(y_predict)
correct = np.count_nonzero(y_predict[1].ravel() == y_test)
opencv_svm_acc = correct*100/10000
end4 = time.time()
print(f"OpenCV SVC 识别手写字母集的准确率是{round(opencv_svm_acc,2)}%, 测试用时: {round(end4 - start4,2)}秒")

sklearn KNN 识别手写字母集的准确率是92.99%, 测试用时: 1.67秒
OpenCV KNN 识别手写字母集的准确率是93.0%, 测试用时: 0.1秒
sklearn SVC 识别手写字母集的准确率是93.41%, 测试用时: 5.08秒
OpenCV SVC 识别手写字母集的准确率是96.65%, 测试用时: 3.89秒
