In [32]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [33]:
def look_img(img):
    img_RGB = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    plt.imshow(img_RGB)
    plt.show()

weights = "yolov3.weights"
config_file = "yolov3.cfg"
net = cv2.dnn.readNet(weights, config_file)
layersNames = net.getLayerNames()
output_layers_names = [layersNames[i[0]-1] for i in net.getUnconnectedOutLayers()]

with open('coco.names','r')as f:
    classes = f.read().splitlines()

In [34]:
CONF_THRES = 0.2
NMS_THRES = 0.4

In [75]:
def process_frame(img):
    height,width,_ = img.shape
    blob = cv2.dnn.blobFromImage(img, 1/255, (416,416),(0,0,0),swapRB=True,crop=False)
    net.setInput(blob)
    # 前向推断
    prediction = net.forward(output_layers_names)

    # 存放预测框的坐标
    boxes = []
    # 存放(有无物体)置信度
    objectess = []
    # 存放类别概率
    class_probs = []
    # 存放预测框类别的索引号
    class_ids = []
    # 存放预测框类别名称
    class_names = []

    # 遍历三种尺度
    for scale in prediction:
        for bbox in scale:
            obj = bbox[4]
            class_scores = bbox[5:]

            # 以下代码为改进代码，加入后运行速度能有一定提升，具体分析看下面的文章
            # temp = np.max(class_scores)
            # temp1 = obj * temp
            # if(temp1<CONF_THRES):
            #     continue

            class_id = np.argmax(class_scores)
            class_name = classes[class_id]
            class_prob = class_scores[class_id]

            # 获取预测框中心点坐标、预测框宽高
            center_x = int(bbox[0]*width)
            center_y = int(bbox[1]*height)
            w = int(bbox[2]*width)
            h = int(bbox[3]*height)
            # 预测框左上角坐标
            x = int(center_x - w/2)
            y = int(center_y - h/2)

            boxes.append([x,y,w,h])
            objectess.append(float(obj))
            class_ids.append(class_id)
            class_names.append(class_name)
            class_probs.append(class_prob)

    confidences = np.array(class_probs)*np.array(objectess) # obj*class_scoreen
    indexes = cv2.dnn.NMSBoxes(boxes,confidences,CONF_THRES,NMS_THRES) #阈值和极大值抑制
    indexes.flatten()

    colors = np.random.uniform(0,255,size=(len(boxes),3)) # 随机给每一个预测框生成一种颜色
    # 遍历留下的每一个预测框，可视化
    for i in indexes.flatten():
        # 获取坐标与置信度
        x, y, w, h = boxes[i]
        confidence = str(round(confidences[i],2))
        # 获取颜色，画框
        color = colors[i % len(colors)]
        # color = [255, 0, 255]
        cv2.rectangle(img,(x,y),(w+x,h+y),color,4)
        # 写上类别名称与置信度
        string = '{} {}'.format(class_names[i],confidence)
        # 图片，文字，左上坐标，字体，字体大小，颜色，字体粗细
        cv2.putText(img,string,(x,y+20),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),1)
    # look_img(img)
    return img

In [76]:
import cv2

def video_demo():
    capture = cv2.VideoCapture(0)#0为电脑内置摄像头
    while(True):
        success, frame = capture.read()#摄像头读取,ret为是否成功打开摄像头,true,false。 frame为视频的每一帧图像
        if not success:
            print('ERROR')
            break
        frame = cv2.flip(frame, 1)#摄像头是和人对立的，将图像左右调换回来正常显示。
        frame = process_frame(frame)
        cv2.imshow("video", frame)
        c = cv2.waitKey(50)
        if c == 27: # 当按下esc
            capture.release()
            break

video_demo()
cv2.destroyAllWindows()

## 发现帧数很低，思考问题所在：系统自带摄像头不行/处理代码速度较慢

In [79]:
# 运行系统摄像头，检验好坏
import cv2

def video_demo():
    capture = cv2.VideoCapture(0)#0为电脑内置摄像头
    while(True):
        success, frame = capture.read()#摄像头读取,ret为是否成功打开摄像头,true,false。 frame为视频的每一帧图像
        if not success:
            print('ERROR')
            break
        frame = cv2.flip(frame, 1)#摄像头是和人对立的，将图像左右调换回来正常显示。
        cv2.imshow("video", frame)
        c = cv2.waitKey(50)
        if c == 27: # 当按下esc
            capture.release()
            break

video_demo()
cv2.destroyAllWindows()

In [90]:
# 测试代码各模块耗时
import time
def process_frame1(img):
    start = time.time()
    height,width,_ = img.shape
    blob = cv2.dnn.blobFromImage(img, 1/255, (416,416),(0,0,0),swapRB=True,crop=False)
    end0 = time.time()

    net.setInput(blob)
    # 前向推断
    prediction = net.forward(output_layers_names)
    end1 = time.time()

    # 存放预测框的坐标
    boxes = []
    # 存放(有无物体)置信度
    objectess = []
    # 存放类别概率
    class_probs = []
    # 存放预测框类别的索引号
    class_ids = []
    # 存放预测框类别名称
    class_names = []
    # 遍历三种尺度
    for scale in prediction:
        for bbox in scale:
            obj = bbox[4]
            class_scores = bbox[5:]

            # temp = np.max(class_scores)
            # temp1 = obj * temp
            # if(temp1<CONF_THRES):
            #     continue

            class_id = np.argmax(class_scores)
            class_name = classes[class_id]
            class_prob = class_scores[class_id]

            # 获取预测框中心点坐标、预测框宽高
            center_x = int(bbox[0]*width)
            center_y = int(bbox[1]*height)
            w = int(bbox[2]*width)
            h = int(bbox[3]*height)
            # 预测框左上角坐标
            x = int(center_x - w/2)
            y = int(center_y - h/2)

            boxes.append([x,y,w,h])
            objectess.append(float(obj))
            class_ids.append(class_id)
            class_names.append(class_name)
            class_probs.append(class_prob)
    end2 = time.time()

    confidences = np.array(class_probs)*np.array(objectess) # obj*class_scoreen
    indexes = cv2.dnn.NMSBoxes(boxes,confidences,CONF_THRES,NMS_THRES) #阈值和极大值抑制
    indexes.flatten()
    end3 = time.time()

    colors = np.random.uniform(0,255,size=(len(boxes),3)) # 随机给每一个预测框生成一种颜色
    # 遍历留下的每一个预测框，可视化
    for i in indexes.flatten():
        # 获取坐标与置信度
        x, y, w, h = boxes[i]
        confidence = str(round(confidences[i],2))
        # 获取颜色，画框
        color = colors[i % len(colors)]
        # color = [255, 0, 255]
        cv2.rectangle(img,(x,y),(w+x,h+y),color,4)
        # 写上类别名称与置信度
        string = '{} {}'.format(class_names[i],confidence)
        # 图片，文字，左上坐标，字体，字体大小，颜色，字体粗细
        cv2.putText(img,string,(x,y+20),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),1)
    # look_img(img)
    end4 = time.time()

    print(end0-start) # 图片预处理
    print(end1-start) # 前向推断
    print(end2-start) # 预测框遍历
    print(end3-start) # 阈值和NMS处理
    print(end4-start) # 绘制边框

In [91]:
img = cv2.imread('images/test4.jpg')
process_frame1(img)

0.0020351409912109375
0.432842493057251
0.6452746391296387
0.6472692489624023
0.6482670307159424


    # 改进部分
    for scale in prediction:
        for bbox in scale:
            obj = bbox[4]
            class_scores = bbox[5:]

            temp = np.max(class_scores)
            temp1 = obj * temp
            if(temp1<CONF_THRES):
                continue

            class_id = np.argmax(class_scores)
            class_name = classes[class_id]