In [1]:
import cv2 as cv
import random
import glob
import os
import numpy as np
import sklearn.metrics as sm
import joblib


## 获取训练和测试样本

In [None]:
# 读取负样本
def read_neg_samples(foldername):
    imgs = []
    labels = []
    neg_count = 0
    filenames = glob.iglob(os.path.join(foldername, '*'))
    for filename in filenames:
        src = cv.imread(filename, 1)
        imgs.append(src)
        labels.append(-1)
        neg_count += 1
    return imgs, labels


# 读取正样本
def read_pos_samples(foldername):
    imgs = []
    labels = []
    pos_count = 0
    filenames = glob.iglob(os.path.join(foldername, '*'))
    for filename in filenames:
        src = cv.imread(filename)
        imgs.append(src)
        labels.append(1)
        pos_count += 1
    return imgs, labels

# 读取测试样本
def read_test_samples(foldername):
    imgs = []
    labels = []
    pos_count = 0
    filenames = glob.iglob(os.path.join(foldername, '*'))

    for filename in filenames:
        src = cv.imread(filename)
        imgs.append(src)
        labels.append(1)
        pos_count += 1

    return imgs, labels

## 计算 HOG 特征, 统一所有数据集尺寸64 * 128 

In [None]:
# 计算hog特征 64 * 128
def computeHog(imgs, features, wsize=(128, 64)):
    hog = cv.HOGDescriptor()  
    # 默认参数 winSize（64,128），blockSize（16,16），blockStride（8,8），cellSize（8,8）
    count = 0

    for i in range(len(imgs)):
        if imgs[i].shape[1] >= wsize[1] and imgs[i].shape[0] >= wsize[0]:
            y = imgs[i].shape[0] - wsize[0]
            x = imgs[i].shape[1] - wsize[1]
            h = imgs[i].shape[0]
            w = imgs[i].shape[1]
            roi = imgs[i][y: y + h, x: x + w]
            features.append(hog.compute(roi))
            count += 1

    print('count = ', count)
    return features


## HOG 特征提取 

In [None]:
# 获取所有的hog特征
def get_features(features, labels):
    pos_imgs, pos_labels = read_pos_samples('./new/train/pos')  # 数据集文件路径
    computeHog(pos_imgs, features)

    [labels.append(1) for _ in range(len(pos_imgs))]

    neg_imgs, neg_labels = read_neg_samples('./new/train/neg') # 数据集文件路径
    computeHog(neg_imgs, features)

    [labels.append(-1) for _ in range(len(neg_imgs))]

    return features, labels

def get_featurest(features, labels):
    pos_imgs, pos_labels = read_test_samples('./new/test/pos')# 数据集文件路径
    computeHog(pos_imgs, features)

    [labels.append(1) for _ in range(len(pos_imgs))]

    return features, labels



## SVM 参数配置

In [None]:
def svm_config():
    svm = cv.ml.SVM_create()
    svm.setCoef0(0)
    svm.setCoef0(0.0)
    svm.setDegree(3)
    criteria = (cv.TERM_CRITERIA_MAX_ITER + cv.TERM_CRITERIA_EPS, 1000, 1e-3)
    svm.setTermCriteria(criteria)
    svm.setGamma(0)
    svm.setKernel(cv.ml.SVM_LINEAR)
    svm.setNu(0.5)
    svm.setP(0.1)
    svm.setC(0.01)
    svm.setType(cv.ml.SVM_EPS_SVR)

    return svm
# svm训练
def svm_train(svm, features, labels):
    svm.train(np.array(features), cv.ml.ROW_SAMPLE, np.array(labels))

# svm参数保存
def svm_save(svm, name):
    svm.save(name)

# svm加载参数
def svm_load(name):
    svm = cv.ml.SVM_load(name)
    return svm

## 考虑hard example

In [None]:

# 获取svm参数
def get_svm_detector(svm):
    sv = svm.getSupportVectors()
    rho, _, _ = svm.getDecisionFunction(0)
    sv = np.transpose(sv)
    return np.append(sv, [[-rho]], 0)


# 加载hardexample
def get_hard_samples(svm, hog_features, labels):
    hog = cv.HOGDescriptor()
    hard_examples = []
    hog.setSVMDetector(get_svm_detector(svm))
    negs, hardlabel = read_neg_samples('./new/train/neg')  

    for i in range(len(negs)):
        rects, wei = hog.detectMultiScale(negs[i], 0, winStride=(8, 8), padding=(0, 0), scale=1.05)
        for (x, y, w, h) in rects:
            hardexample = negs[i][y: y + h, x: x + w]
            hard_examples.append(cv.resize(hardexample, (96, 160)))

    computeHog(hard_examples, hog_features)
    [labels.append(-1) for _ in range(len(hard_examples))]
    svm_train(svm, hog_features, labels)
    hog.setSVMDetector(get_svm_detector(svm))
    hog.save('myHogDector1.bin')
    # svm.train(np.array(hog_features),cv.ml.ROW_SAMPLE,np.array(labels))

# hog训练
def hog_train(svm):
    features = []
    labels = []

    hog = cv.HOGDescriptor()

    # get hog features
    get_features(features, labels)

    # svm training
    print('svm training...')
    svm_train(svm, features, labels)
    print('svm training complete...')

    # hog.setSVMDetector(get_svm_detector(svm))
    # hog.save('myHogDector.bin')
    #
    # print('hard samples training...')
    # get_hard_samples(svm, features, labels)
    # print('hard samples complete...')
    return svm


## 测试集 测试标签

### 包含正例1126条

In [2]:
test_f = []
test_l = []
get_features(test_f, test_l)

count =  1126


([array([0.03056815, 0.02831188, 0.03571884, ..., 0.20435983, 0.11329463,
         0.03549691], dtype=float32),
  array([0.06044988, 0.08034912, 0.11868712, ..., 0.13798372, 0.07001861,
         0.07092188], dtype=float32),
  array([0.0489358 , 0.04264932, 0.10589723, ..., 0.10046297, 0.10034597,
         0.04971405], dtype=float32),
  array([0.04480839, 0.01772235, 0.05605903, ..., 0.04014067, 0.02178785,
         0.04045127], dtype=float32),
  array([0.03390313, 0.08074331, 0.08983433, ..., 0.09383947, 0.02269488,
         0.06159115], dtype=float32),
  array([0.0777797 , 0.1052393 , 0.12951638, ..., 0.09686787, 0.08997367,
         0.08493545], dtype=float32),
  array([0.10321008, 0.09555846, 0.09148253, ..., 0.21739396, 0.13922738,
         0.1664655 ], dtype=float32),
  array([0.11452092, 0.15008233, 0.10202973, ..., 0.10876949, 0.05837967,
         0.03657097], dtype=float32),
  array([0.1043318 , 0.16643979, 0.2713932 , ..., 0.02188955, 0.03870054,
         0.04677667], dtype=fl

## 训练集 训练标签

### 包含正例 2416条, 负例 1218条

In [3]:
train_f = []
train_l = []
get_features(train_f, train_l)

count =  2416
count =  1218


([array([0.08146474, 0.1275494 , 0.19812572, ..., 0.04948864, 0.03647127,
         0.02225501], dtype=float32),
  array([0.08304907, 0.06499917, 0.08048643, ..., 0.03144679, 0.0107292 ,
         0.00750533], dtype=float32),
  array([0.2565353 , 0.12218852, 0.03508349, ..., 0.01567453, 0.00050746,
         0.00143523], dtype=float32),
  array([0.15938307, 0.04728963, 0.01300532, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([0.24087198, 0.24087198, 0.08078615, ..., 0.06427958, 0.06561591,
         0.06531481], dtype=float32),
  array([0.24988115, 0.06247523, 0.06271476, ..., 0.06989223, 0.05553125,
         0.05921609], dtype=float32),
  array([0.08061805, 0.09308341, 0.13580021, ..., 0.        , 0.        ,
         0.3529295 ], dtype=float32),
  array([0.17258906, 0.09276723, 0.06979511, ..., 0.        , 0.        ,
         0.35632318], dtype=float32),
  array([0.1591899 , 0.09331665, 0.15811895, ..., 0.        , 0.        ,
         0.3592449 ], dtype=fl

## 配置 sklearn.svm 

In [13]:
import sklearn.svm as svm 
model = svm.SVC(kernel='rbf', C=0.01, degree = 3, coef0 = 0.0) 


## 模型训练

In [14]:
model.fit(train_f, train_l) 
import joblib
joblib.dump(model, "my_model.m")

SVC(C=0.01)

## 测试

In [15]:
pred_l = model.predict(test_f) 

## 输出测试结果

In [16]:
import sklearn.metrics as sm 
bg = sm.classification_report(test_l, pred_l) 
print("分类结果", bg, sep='\n') 


分类结果
              precision    recall  f1-score   support

           1       1.00      1.00      1.00      1126

    accuracy                           1.00      1126
   macro avg       1.00      1.00      1.00      1126
weighted avg       1.00      1.00      1.00      1126



## 模型保存和载入

In [18]:
import joblib
joblib.dump(model, "my_model.m")

['my_model.m']

In [19]:
model1 = joblib.load("my_model.m")

In [20]:
pred_l = model1.predict(test_f) 

import sklearn.metrics as sm 
bg = sm.classification_report(test_l, pred_l) 
print("分类结果", bg, sep='\n') 

分类结果
              precision    recall  f1-score   support

           1       1.00      1.00      1.00      1126

    accuracy                           1.00      1126
   macro avg       1.00      1.00      1.00      1126
weighted avg       1.00      1.00      1.00      1126

