In [5]:
import os
import cv2
import time
import glob
import numpy as np
from sklearn import svm 
from sklearn.externals import joblib
from skimage import io,transform  

def disturb(data,lable):
    num_example=data.shape[0]
    arr=np.arange(num_example)
    np.random.shuffle(arr)
    img=data[arr]
    labels=label[arr]
    return img,labels

#将所有数据分为训练集和验证集\测试集
def allocate(data,label):
    inter1 = 0.6
    inter2 = 0.8
    num_example = data.shape[0]
    s1 = np.int(num_example*inter1)
    s2 = np.int(num_example*inter2)
    x_train = data[:s1]
    y_train = label[:s1]
    x_val = data[s1:s2]
    y_val = label[s1:s2]
    x_test = data[s2:]
    y_test = label[s2:]
    return x_train,y_train,x_val,y_val,x_test,y_test

#计算sift特征
def calcSiftFeature(img):
    #print('图片大小',img.shape)
    #将图像转化为灰度图像
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #设置图像sift特征关键点最大为200
    sift = cv2.xfeatures2d.SURF_create()
    #计算图片的特征点和特征点描述
    keypoints, features = sift.detectAndCompute(gray, None)
    return features

#计算字典
def learnVocabulary(features):
    wordCnt = 50
    # use k-means to cluster a bag of features
    #criteria表示迭代停止的模式   eps---精度0.1，max_iter---满足超过最大迭代次数20
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 0.1)
    #得到k-means聚类的初始中心点
    flags = cv2.KMEANS_RANDOM_CENTERS
    # 标签，中心 = kmeans(输入数据（特征)、聚类的个数K,预设标签，聚类停止条件、重复聚类次数、初始聚类中心点
    #print('特征大小',features.shape)
    compactness, labels, centers = cv2.kmeans(features, wordCnt, None,criteria, 20, flags)
    #print ("generated vocabulary Done")
    return centers

#计算特征向量
def calcFeatVec(features, centers):
    featVec = np.zeros((1, 50))
    for i in range(0, features.shape[0]):
        fi = features[i]
        diffMat = np.tile(fi, (50, 1)) - centers
        #axis=1按行求和，即求特征到每个中心点的距离
        sqSum = (diffMat**2).sum(axis=1)
        dist = sqSum**0.5
        #升序排序
        sortedIndices = dist.argsort()
        #取出最小的距离，即找到最近的中心点
        idx = sortedIndices[0]
        #该中心点对应+1
        featVec[0][idx] += 1
    return featVec

#建立数据集（特征数据集）
def build_center(path):
    print(path)
    #os.listdir(path)表示在path路径下的所有文件和和文件夹列表
    #用cate记录五种花的文件路径
    cate=[path+'/'+x for x in os.listdir(path) if os.path.isdir(path+'/'+x)]
    features = np.float32([]).reshape(0, 64)#存放训练集图片的特征
    for idx,folder in enumerate(cate):
        for im in glob.glob(folder+'/*.jpg'):
            #print('reading the images:%s'%(im))#im表示某张图片的路径
            img=cv2.imread(im)
            #获取图片sift特征点
            img_f = calcSiftFeature(img)
            #特征点加入训练数据
            features = np.append(features, img_f, axis=0)
            #标签即为文件夹标号0-4
    #print('features:',features.shape)
    #学习训练集的词袋
    centers = learnVocabulary(features)
    filename = "e:/flowers/svm_centers.npy"
    np.save(filename, centers)
    print('词袋:',centers.shape)
    return  centers

def SVM_Train(imgs,labels):
    #设置SVM模型参数
    clf = svm.SVC(decision_function_shape='ovo')
    #利用x_train,y_train训练SVM分类器，获得参数
    clf.fit(imgs,labels)
    joblib.dump(clf, "e:/flowers/svm_model.m")

def SVM_Test(x_test,y_test):
    clf = joblib.load("e:/flowers/svm_model.m")
    res = clf.predict(x_test)
    num_test = x_test.shape[0]
    acc = 0
    for i in range(num_test):
        if y_test[i] == res[i]:
            acc = acc+1
    return acc/num_test,res
    

if __name__ == "__main__":
    path = 'e:/flower'
    centers = 
    imgs,labels = build_data(path)
    #打乱数据
    imgs,labels = disturb(imgs,labels)
    x_train,y_train,x_test,y_test = allocate(imgs,labels)
    print(x_train.shape)
    print(y_train)
    #imgs.reshpae(-1,50)
    SVM_Train(x_train,y_train)
    acc,res = SVM_Test(x_test,y_test)
    


e:/flower
['e:/flower/daisy', 'e:/flower/dandelion', 'e:/flower/roses', 'e:/flower/sunflowers', 'e:/flower/tulips']
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 64)
(64,)
(50, 64)
(50, 

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 64 and the array at index 1 has size 50

In [3]:
 print(acc)

0.2506811989100817
