In [1]:
# -*- coding: utf-8 -*-
# @Time    : 2017/7/13 下午7:17
# @Author  : play4fun
# @File    : 1-kNN.py
# @Software: PyCharm

"""
1-kNN.py:

k 的取值最好为奇数
根据 k 个 最近邻居进行分类的方法 称为 kNN

权重
距离近的具有更高的权重， 距离远的权重更低

"""

import cv2
import numpy as np
import matplotlib.pyplot as plt

# Feature set containing (x,y) values of 25 known/training data
trainData = np.random.randint(0, 100, (25, 2)).astype(np.float32)
# Labels each one either Red or Blue with numbers 0 and 1
responses = np.random.randint(0, 2, (25, 1)).astype(np.float32)
# Take Red families and plot them
red = trainData[responses.ravel() == 0]

plt.scatter(red[:, 0], red[:, 1], 80, 'r', '^')
# Take Blue families and plot them
blue = trainData[responses.ravel() == 1]

plt.scatter(blue[:, 0], blue[:, 1], 80, 'b', 's')
plt.show()

# 测试数据被标记为绿色
# # 回值包括 
# 1. 由 kNN算法计算得到的测 数据的类别标志0或1 。
# 如果你想使用最近邻算法 只需 将 k  置为 1 k 就是最近邻的数目。
# 2. k 个最近邻居的类别标志。
# 3. 每个最近邻居到测 数据的 离。
newcomer = np.random.randint(0, 100, (1, 2)).astype(np.float32)
plt.scatter(newcomer[:, 0], newcomer[:, 1], 80, 'g', 'o')
knn = cv2.ml.KNearest_create()
knn.train(trainData, cv2.ml.ROW_SAMPLE, responses)
ret, results, neighbours, dist = knn.findNearest(newcomer, 3)

print("result: ", results, "\n")
print("neighbours: ", neighbours, "\n")
print("distance: ", dist)
plt.show()

# 如果我们有大 的数据   测  可以直接传入一个数组。对应的结果 同样也是数组

# 10 new comers
newcomers = np.random.randint(0, 100, (10, 2)).astype(np.float32)
ret, results, neighbours, dist = knn.findNearest(newcomer, 3)
# The results also will contain 10 labels.


<Figure size 640x480 with 1 Axes>

result:  [[1.]] 

neighbours:  [[1. 1. 1.]] 

distance:  [[128. 200. 404.]]


<Figure size 640x480 with 1 Axes>

In [None]:
# -*- coding: utf-8 -*-
# @Time    : 2017/7/13 下午7:32
# @Author  : play4fun
# @File    : 2-使用kNN对手写数字OCR.py
# @Software: PyCharm

"""
2-使用kNN对手写数字OCR.py:
"""

# 准备数据

import numpy as np
import cv2
from matplotlib import pyplot as plt

img = cv2.imread('img/digits.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Now we split the image to 5000 cells, each 20x20 size
cells = [np.hsplit(row, 100) for row in np.vsplit(gray, 50)]

# Make it into a Numpy array. It size will be (50,100,20,20)
x = np.array(cells)
# Now we prepare train_data and test_data.
train = x[:, :50].reshape(-1, 400).astype(np.float32)  # Size = (2500,400)
test = x[:, 50:100].reshape(-1, 400).astype(np.float32)  # Size = (2500,400)

# Create labels for train and test data
k = np.arange(10)
train_labels = np.repeat(k, 250)[:, np.newaxis]
test_labels = train_labels.copy()

# Initiate kNN, train the data, then test it with test data for k=1
knn = cv2.ml.KNearest_create()
knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)
ret, result, neighbours, dist = knn.findNearest(test, k=5)

# Now we check the accuracy of classification
# For that, compare the result with test_labels and check which are wrong
matches = result == test_labels
correct = np.count_nonzero(matches)
accuracy = correct * 100.0 / result.size
print('准确率', accuracy)  # 准确率91.76%


# save the data
np.savez('knn_data.npz', train=train, train_labels=train_labels,test=test,test_labels=test_labels)

# Now load the data
with np.load('knn_data_num.npz') as data:
    print(data.files)
    train = data['train']
    train_labels = data['train_labels']
    test = data['test']
    test_labels = data['test_labels']


#TODO 怎样预测数字？
retval, results=knn.predict(test[1003:1005])
# Docstring: predict(samples[, results[, flags]]) -> retval, results
print(retval, results)#(4.0, array([[ 4.],[ 4.]], dtype=float32))
#对比
cv2.imwrite('test[1005].jpg',test[1005].reshape((20,20)))

In [None]:
# -*- coding: utf-8 -*-
# @Time    : 2017/7/13 下午7:35
# @Author  : play4fun
# @File    : 2-英文字母的OCR.py
# @Software: PyCharm

"""
2-英文字母的OCR.py:
"""

import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load the data, converters convert the letter to a number
data = np.loadtxt('img/letter-recognition.data', dtype='float32', delimiter=',',
                  converters={0: lambda ch: ord(ch) - ord('A')})#20000个
# split the data to two, 10000 each for train and test
train, test = np.vsplit(data, 2)
# split trainData and testData to features and responses
responses, trainData = np.hsplit(train, [1])
labels, testData = np.hsplit(test, [1])

# Initiate the kNN, classify, measure accuracy.
knn = cv2.ml.KNearest_create()
knn.train(trainData, cv2.ml.ROW_SAMPLE, responses)
ret, result, neighbours, dist = knn.findNearest(testData, k=5)

correct = np.count_nonzero(result == labels)
accuracy = correct * 100.0 / 10000
print('准确率', accuracy)#93.06
#准确率 到了 93.22%。同样你可以  增加训练样本的数量来提 准确率。


# save the data
np.savez('knn_data_alphabet.npz', train_alphabet=train, train_labels_alphabet=responses,test_alphabet=testData,test_labels_alphabet=labels)

#怎样预测字母？跟预测数字的一样


In [None]:
# -*- coding: utf-8 -*-
# @Time    : 2017/8/10 17:59
# @Author  : play4fun
# @File    : 同时预测数字和英文字母1.py
# @Software: PyCharm

"""
同时预测数字和英文字母1.py:
"""

import numpy as np
import cv2
from matplotlib import pyplot as plt

with np.load('knn_data_num.npz') as data:
    print(data.files)  # ['train', 'train_labels', 'test', 'test_labels']
    train = data['train']
    train_labels = data['train_labels']
    test = data['test']
    test_labels = data['test_labels']

with np.load('knn_data_alphabet.npz') as data:
    print(data.files)
    train_alphabet = data['train_alphabet']
    train_labels_alphabet = data['train_labels_alphabet']
    test_alphabet = data['test_alphabet']
    test_labels_alphabet = data['test_labels_alphabet']

# shape不一致，无法合并
# train.shape #(2500, 400)
# train_alphabet.shape#(10000, 17)
# print('合并-数字-字母数据')
# train = np.append(train, train_alphabet)
# tratrain_labelsin = np.append(train_labels,train_labels_alphabet)
# test = np.append(test, test_alphabet)
# test_labels = np.append(test_labels, test_labels_alphabet)

print('加载KNN,数据')
knn = cv2.ml.KNearest_create()
knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)
knn.train(train_alphabet, cv2.ml.ROW_SAMPLE, train_labels_alphabet)

ret, result, neighbours, dist = knn.findNearest(
    test, k=5)# shape不一致
#出错，knearest.cpp:325: error: (-215) test_samples.type() == CV_32F && test_samples.cols == samples.cols in function findNearest

In [None]:
# -*- coding: utf-8 -*-
# @Time    : 2017/8/8 11:57
# @Author  : play4fun
# @File    : 预测手写数字1.py
# @Software: PyCharm

"""
预测手写数字1.py:

验证码
https://login.bthhotels.com/
"""

import numpy as np
import cv2
from matplotlib import pyplot as plt

with np.load('knn_data_num.npz') as data:
    print(data.files)  # ['train', 'train_labels', 'test', 'test_labels']
    train = data['train']
    train_labels = data['train_labels']
    test = data['test']
    test_labels = data['test_labels']

print('加载KNN,数据')
knn = cv2.ml.KNearest_create()
knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)

# 加载相片
print('加载相片')
img2 = cv2.imread('2.png', 0)
gray2 = cv2.resize(img2, (20, 20))
# gray2=gray2.reshape((400,))
gray21 = gray2.reshape((-1, 400)).astype(np.float32)

img6 = cv2.imread('6.png', 0)
gray6 = cv2.resize(img6, (20, 20))
# gray2=gray2.reshape((400,))
gray61 = gray6.reshape((-1, 400)).astype(np.float32)

g2 = np.append(gray21, gray61)
g3 = g2.reshape((2, 400))

# 预测
retval, results = knn.predict(g3)
print(retval, results)  # 不准确
# (0.0, array([[ 0.],
#         [ 5.]], dtype=float32))


In [None]:
# -*- coding: utf-8 -*-
# @Time    : 2017/8/8 12:33
# @Author  : play4fun
# @File    : knn-find_nearest.py
# @Software: PyCharm

"""
knn-find_nearest.py:
http://www.bogotobogo.com/python/OpenCV_Python/python_opencv3_Machine_Learning_Classification_K-nearest_neighbors_k-NN.php
"""

import cv2
import numpy as np
import matplotlib.pyplot as plt

# Feature set containing (x,y) values of 25 known/training data
trainData = np.random.randint(0, 100, (25, 2)).astype(np.float32)

# Labels each one either Red or Blue with numbers 0 and 1
responses = np.random.randint(0, 2, (25, 1)).astype(np.float32)

# plot Reds
red = trainData[responses.ravel() == 0]
plt.scatter(red[:, 0], red[:, 1], 80, 'r', '^')

# plot Blues
blue = trainData[responses.ravel() == 1]
plt.scatter(blue[:, 0], blue[:, 1], 80, 'b', 's')

# CvKNearest instance
# knn = cv2.KNearest()
knn = cv2.ml.KNearest_create()
# trains the model
knn.train(trainData, responses)#TODO
#TypeError: only length-1 arrays can be converted to Python scalars


# New sample : (x,y)
newcomer = np.random.randint(0, 100, (1, 2)).astype(np.float32)
plt.scatter(newcomer[:, 0], newcomer[:, 1], 80, 'g', 'o')

# Finds the 3nearest  neighbors and predicts responses for input vectors
ret, results, neighbours, dist = knn.find_nearest(newcomer, 3)

print("result: ", results, "\n")
print("neighbours: ", neighbours, "\n")
print("distance: ", dist)

plt.show()
