# Resnet 50 on webcam with opencv on GPU

## Import libraries

In [1]:
import torch
import torchvision
import cv2
import cv2.cuda as cv2cuda
import numpy as np
import time

## Check opencv cuda

In [33]:
cv2cuda.printCudaDeviceInfo(0)

*** CUDA Device Query (Runtime API) version (CUDART static linking) *** 

Device count: 1

Device 0: "Quadro T1000"
  CUDA Driver Version / Runtime Version          11.80 / 10.20
  CUDA Capability Major/Minor version number:    7.5
  Total amount of global memory:                 3914 MBytes (4104454144 bytes)
  GPU Clock Speed:                               1.53 GHz
  Max Texture Dimension Size (x,y,z)             1D=(131072), 2D=(131072,65536), 3D=(16384,16384,16384)
  Max Layered Texture Size (dim) x layers        1D=(32768) x 2048, 2D=(32768,32768) x 2048
  Total amount of constant memory:               65536 bytes
  Total amount of shared memory per block:       49152 bytes
  Total number of registers available per block: 65536
  Warp size:                                     32
  Maximum number of threads per block:           1024
  Maximum sizes of each dimension of a block:    1024 x 1024 x 64
  Maximum sizes of each dimension of a grid:     2147483647 x 65535 x 65535
  Maximum

## Download model

In [3]:
model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.DEFAULT)

## Labels

In [4]:
# dict with ImageNet labels
with open('imagenet_labels.txt') as f:
    labels = eval(f.read())

## Inference

In [9]:
CAPTURE_WIDTH = 640
CAPTURE_HEIGHT = 480

In [37]:
cap.release()
cv2.destroyAllWindows()

# Open webcam and start inference
cap = cv2.VideoCapture(0)
gpu_frame = cv2.cuda_GpuMat()
cap.set(cv2.CAP_PROP_FRAME_WIDTH, CAPTURE_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, CAPTURE_HEIGHT)
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.5
fontColor = (10,10,10)
lineThickness= 1
lineType = cv2.LINE_AA
pos = 30
do_preprocess = True

while True:
    t0 = time.time()
    ret, frame = cap.read()
    t_frame = time.time()
    gpu_frame.upload(frame)
    pos = 30
    string = f"Image resolution: {frame.shape}"
    cv2.putText(frame, string, (10, pos), font, fontScale, fontColor, lineThickness, lineType)
    pos += 20
    print(string, end='')
    string = f"Open frame time: {((t_frame - t0)*1000):.2f} ms"
    cv2.putText(frame, string, (10, pos), font, fontScale, fontColor, lineThickness, lineType)
    pos += 20
    print('\t'+string, end='')
    if not ret:
        continue

    # Preprocess image
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    t_color = time.time()
    string = f"Color transformation: {((t_color - t_frame)*1000):.2f} ms"
    cv2.putText(frame, string, (10, pos), font, fontScale, fontColor, lineThickness, lineType)
    pos += 20
    print('\t'+string, end='')
#    # img = cv2.resize(img, (224, 224))
    img = np.transpose(img, (2, 0, 1))
    img = img.astype(np.float32) / 255.0
    img = torch.from_numpy(img)
    img = img.unsqueeze(0)

    # Inference
    model.eval()
    with torch.no_grad():
        start = time.time()
        outputs = model(img)
        end = time.time()
        string = f"Inference time: {((end - start)*1000):.2f} ms"
        cv2.putText(frame, string, (10, pos), font, fontScale, fontColor, lineThickness, lineType)
        pos += 20
        print('\t'+string, end='')

    # Postprocess
    outputs = torch.nn.functional.softmax(outputs, dim=1)
    outputs = outputs.squeeze(0)
    outputs = outputs.tolist()
    idx = outputs.index(max(outputs))
    string = f"Predicted: {idx}-{labels[idx]}"
    cv2.putText(frame, string, (10, pos), font, fontScale, fontColor, lineThickness, lineType)
    pos += 20
    print('\t'+string, end='')

    # FPS
    t = time.time() - t0
    string = f"FPS: {1/t:.2f}"
    cv2.putText(frame, string, (10, pos), font, fontScale, fontColor, lineThickness, lineType)
    pos += 20
    print('\t'+string, end='')

    # Image shape
    string = f"Image shape: {img.shape}"
    cv2.putText(frame, string, (10, pos), font, fontScale, fontColor, lineThickness, lineType)
    pos += 20
    print('\t'+string, end='')
    print()

    # Display
#     cv2.imshow("frame", frame)
#     if cv2.waitKey(1) == ord('q'):
#         break


cap.release()
cv2.destroyAllWindows()



error: OpenCV(4.5.0) /opt/opencv-4.5.0/modules/core/src/cuda/gpu_mat.cu:116: error: (-217:Gpu API call) all CUDA-capable devices are busy or unavailable in function 'allocate'


In [29]:
cv2.cudacodec.createVideoReader("0")

error: OpenCV(4.5.0) /opt/opencv-4.5.0/modules/core/include/opencv2/core/private.cuda.hpp:112: error: (-213:The function/feature is not implemented) The called functionality is disabled for current build or platform in function 'throw_no_cuda'


In [35]:
cap.release()
cv2.destroyAllWindows()

In [None]:
cv2.cudacodec.