In [1]:
import darknet
import cv2

net = darknet.load_net("yolov3.cfg", "yolov3.weights", 0)
meta = darknet.load_meta("coco.names")

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # 将OpenCV图像转换为Darknet格式的图像
    dark_frame = darknet.make_image(frame.shape[1], frame.shape[0], 3)
    darknet.copy_image_from_bytes(dark_frame, frame.tobytes())
    
    # 进行目标检测
    results = darknet.detect_image(net, meta, dark_frame)
    
    # 在图像上绘制检测结果
    for result in results:
        x, y, w, h = result[2]
        cv2.rectangle(frame, (int(x - w/2), int(y - h/2)), (int(x + w/2), int(y + h/2)), (255, 0, 0), 2)
        cv2.putText(frame, str(result[0].decode("utf-8")), (int(x - w/2), int(y - h/2) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    
    # 显示结果
    cv2.imshow('frame', frame)
    
    # 按下q键退出程序
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


AttributeError: module 'darknet' has no attribute 'load_net'

In [2]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

#读取网络配置文件和权重文件
net=cv2.dnn.readNet(model='yolov3.weights',
                    config='yolov3.cfg')
#由yolo-v3的结构可知，最终有三个尺度的输出
layerName=net.getLayerNames()
#存储输出的三个尺度名称，用于后面进行前向推断的
ThreeOutput_layers_name=[]
for i in net.getUnconnectedOutLayers():
    ThreeOutput_layers_name.append(layerName[i-1])

#因为yolo-v3中检测包含80个类别，所以首先获取类别
with open('coco.names','r') as fp:
    classes=fp.read().splitlines()

#指定过滤的置信度阈值：confidence
Confidence_thresh=0.2
#指定非极大值抑制的值：对候选框进行筛选
Nms_thresh=0.35

#检测的过程已经图形的绘制
def Forward_Predict(frame):
    # 参数情况：图像 ，归一化，缩放的大小，是否对RGB减去一个常数，R和B交换（因为R和B是反着的，所以需要交换），是否裁剪
    blob = cv2.dnn.blobFromImage(frame, 1 / 255, (416, 416), (0, 0, 0), swapRB=True, crop=False)
    #获取图像的高宽
    height,width,channel=frame.shape
    #设置网络输入
    net.setInput(blob)
    #进行前向推断:采用的最后三个尺度输出层作为前向推断
    predict=net.forward(ThreeOutput_layers_name)
    # 存放预测框的坐标
    boxes = []
    #存在预测物体的置信度
    confid_object=[]
    #存放预测的类别
    class_prob=[]
    #存放预测物体的id
    class_id=[]
    #存放预测类别的名称
    class_names=[]
    #根据输出的是三个尺度，所以分别遍历三个尺度
    for scale in predict:
        for box in scale:
            #获取坐标值和高宽
            #首先获取矩形中心坐标值（这里需要映射回原图）
            center_x=int(box[0]*width)
            center_y=int(box[1]*height)
            #计算框的高宽
            w=int(box[2]*width)
            h=int(box[3]*height)
            #获取矩形框的左上角坐标
            left_x=int(center_x-w/2)
            left_y=int(center_y-h/2)
            boxes.append([left_x,left_y,w,h])

            #获取检测物体的置信度
            confid_object.append(float(box[4]))
            #获取概率最大值
            #首先获取最高值概率的下标
            index=np.argmax(box[5:])
            class_id.append(index)
            class_names.append(classes[index])
            class_prob.append(box[index])
    confidences=np.array(class_prob)*np.array(confid_object)
    #计算非极大值抑制
    all_index=cv2.dnn.NMSBoxes(boxes,confidences,Confidence_thresh,Nms_thresh)

    #遍历，绘制矩形框
    for i in all_index.flatten():
        x,y,w,h=boxes[i]
        #四舍五入，保留2位小数
        confidence=str(round(confidences[i],2))
        #绘制矩形框
        cv2.rectangle(img=frame,pt1=(x,y),pt2=(x+w,y+h),
                      color=(0,255,0),thickness=2)
        text=class_names[i]+' '+confidence
        cv2.putText(img=frame,text=text,org=(x,y-10),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1.0,color=(0,0,255),thickness=2)
    return frame

#实时的检测
def detect_time():
    cap=cv2.VideoCapture(0)
    while cap.isOpened():
        OK,frame=cap.read()
        if not OK:
            break
        #将图片进行一下翻转，因为Opencv读取的图片和我们正常是反着的
        frame=cv2.flip(src=frame,flipCode=2)
        frame=cv2.resize(src=frame,dsize=(416,416))
        dst=Forward_Predict(frame)

        cv2.imshow('detect',dst)
        key=cv2.waitKey(1)
        if key==27:
            break
    cap.release()


#单张图片的检测
def signa_Picture(image_path='images/smile.jpg'):
    img=cv2.imread(image_path)
    img=cv2.resize(src=img,dsize=(416,416))
    dst=Forward_Predict(img)
    cv2.imshow('detect',dst)
    key=cv2.waitKey(0)
    if key==27:
        exit()

cap.release()
cv2.destroyAllWindows()

if __name__ == '__main__':
    print('Pycharm')
    # signa_Picture()
    detect_time()


Pycharm


In [3]:
import cv2
import numpy as np

# 加载YOLOv3或YOLOv4
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
# 获取输出层
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# 加载类别名称
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

# 设置颜色
colors = np.random.uniform(0, 255, size=(len(classes), 3))


In [4]:
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if ret:
        # 调整图像大小
        height, width, channels = frame.shape
        blob = cv2.dnn.blobFromImage(
            frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

        # 获取检测结果
        net.setInput(blob)
        outs = net.forward(output_layers)

        # 处理检测结果
        class_ids = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    # 目标的坐标信息是相对于图像宽度和高度的比例，需要转换为像素坐标
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # 非最大抑制（去除重叠的边界框，使用置信度作为分数进行排序）
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
        font = cv2.FONT_HERSHEY_SIMPLEX
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                label = str(classes[class_ids[i]])
                color = colors[class_ids[i]]
                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                cv2.putText(frame, label, (x, y - 5), font, 1, color, 2)

        # 显示结果
        cv2.imshow("Image", frame)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 