To use cv2, you need to install opencv. Here is how to do it: https://pypi.org/project/opencv-python/

In [None]:
import cv2
thres = 0.45 # Threshold to detect object

# change the number in the brackets after VideoCapture according to your connected cameras
# for me 0 is my built-in camera of my laptop and 1 or 2 is an external camera (depends on the usb port I connect it to)
cap = cv2.VideoCapture(0) 

cap.set(3,1280)
cap.set(4,720)
cap.set(10,70)

classNames= []
classFile = 'coco.names'
with open(classFile,'rt') as f:
    classNames = f.read().rstrip('\n').split('\n')

configPath = 'ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt'
weightsPath = 'frozen_inference_graph.pb'

net = cv2.dnn_DetectionModel(weightsPath,configPath) #initiate deep neural network (=dnn)
# This class represents high-level API for object detection networks.
# --> DetectionModel allows to set params for preprocessing input image. DetectionModel creates net from file with trained weights and config, sets preprocessing input, runs forward pass and return result detections.
## --> The model will first see the image as pixels, then detect the edges and contours of its content. Finally, it will look at the whole object before producing a final guess about what the model “sees.”

net.setInputSize(320,320) # Set input size for frame. Can be either a constant size or in (width, height) format. This is the size that the image will be resized to. 
net.setInputScale(1.0/ 127.5) # Multiplier for frame values. This value scales the image by the provided value. 
#-- > normalize the data range: 
# scale pixel values to the range 0-1.
net.setInputMean((127.5, 127.5, 127.5)) # mean value for frame. The mean argument is pretty important. These are actually the mean values that are subtracted from the image’s RGB color channels. This normalizes the input and makes the final input invariance to different illumination scales. This typically helps the network to learn faster since gradients act uniformly for each channel. For a color image, this is a 3D array with a blue layer, a green layer, and a red layer. Each one of those colors has its own value between 0 and 255. 
# --> values used for mean subtraction:
# 255 / 2 = 127.5
net.setInputSwapRB(True) # Generally, OpenCV reads the image in BGR format, and for object detection, the models generally expect the input to be in RGB format. So, the swapRB argument will swap the R and B channels of the image, making it RGB format.
while True:
    success,img = cap.read()
    classIds, confs, bbox = net.detect(img,confThreshold=thres)
    print(classIds,bbox)

    if len(classIds) != 0:
        for classId, confidence, box in zip(classIds.flatten(),confs.flatten(),bbox):
            cv2.rectangle(img,box,color=(0,255,0),thickness=2)
            cv2.putText(img,classNames[classId-1].upper(),(box[0]+10,box[1]+30),
                        cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),2)
            cv2.putText(img,str(round(confidence*100,2)),(box[0]+200,box[1]+30),
                        cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),2)

    cv2.imshow('Output',img)
    cv2.waitKey(1)
    
# to stop the script you need to shut down the kernel

[[ 1]
 [77]
 [77]
 [77]] [[ 104    0 1112  720]
 [ 792   47  152  202]
 [ 866   33  105  198]
 [ 709   39  183  213]]
[[ 1]
 [77]
 [77]] [[ 102    0 1114  720]
 [ 796   44  150  203]
 [ 862   34  110  195]]
[[ 1]
 [77]
 [77]] [[ 107    0 1116  720]
 [ 794   42  152  204]
 [ 862   33  111  195]]
[[ 1]
 [77]
 [77]
 [77]] [[ 110    0 1114  720]
 [ 792   43  153  206]
 [ 863   34  111  193]
 [ 715   40  181  221]]
[[ 1]
 [77]
 [77]] [[ 111    0 1101  720]
 [ 791   42  157  203]
 [ 863   34  110  192]]
[[ 1]
 [77]
 [77]] [[ 103    0 1105  720]
 [ 863   34  109  195]
 [ 793   38  154  208]]
[[ 1]
 [77]
 [77]] [[  92    0 1122  720]
 [ 791   42  155  204]
 [ 864   34  108  192]]
[[ 1]
 [77]
 [77]
 [77]] [[  92    0 1117  720]
 [ 794   43  153  202]
 [ 862   32  111  198]
 [ 714   43  185  212]]
[[ 1]
 [77]
 [77]
 [77]] [[  78    0 1135  720]
 [ 790   46  157  201]
 [ 862   34  112  197]
 [ 714   46  185  209]]
[[ 1]
 [77]
 [77]] [[  78    0 1119  720]
 [ 797   46  151  200]
 [ 863   35  111  