## Torch Model

In [25]:
import cv2
import numpy as np
import argparse
import sys
import time

sys.path.append('./')  # to run '$ python *.py' files in subdirectories

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging, check_img_size
# from torch import onnx
import onnx

img_size = (640, 640)

model = attempt_load("weights/yolov5s.pt", map_location=torch.device('cpu'))  # load FP32 model
delattr(model.model[-1], 'anchor_grid')
model.model[-1].anchor_grid=[torch.zeros(1)] * 3 # nl=3 number of detection layers
model.model[-1].export_cat = True
model.eval()
labels = model.names

# Checks
gs = int(max(model.stride))  # grid size (max stride)
img_size = [check_img_size(x, gs) for x in img_size]  # verify img_size are gs-multiples

# Input
img_path = "../../../thesis_exportable_module/images/test_images/Masked.png"

input_img = cv2.imread(img_path)
# input_img = cv2.resize(input_img, img_size)
# img = np.array(input_img.astype(np.float32))
mean = np.array([0.485, 0.456, 0.406]) * 255.0
scale = 1 / 255.0
std = [0.229, 0.224, 0.225]
img = cv2.dnn.blobFromImage(
    image=input_img,
    scalefactor=scale,
    size=(640, 640),  # img target size
    mean=mean,
    swapRB=True,  # BGR -> RGB
    crop=True  # center crop
)
print(img.shape)

# img = torch.zeros(1, 3, *img_size)  # image size(1,3,320,192) iDetection
# print(img.shape)

# Update model
for k, m in model.named_modules():
    m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
    if isinstance(m, models.common.Conv):  # assign export-friendly activations
        if isinstance(m.act, nn.Hardswish):
            m.act = Hardswish()
        elif isinstance(m.act, nn.SiLU):
            m.act = SiLU()
    # elif isinstance(m, models.yolo.Detect):
    #     m.forward = m.forward_export  # assign forward (optional)
    if isinstance(m, models.common.ShuffleV2Block):#shufflenet block nn.SiLU
        for i in range(len(m.branch1)):
            if isinstance(m.branch1[i], nn.SiLU):
                m.branch1[i] = SiLU()
        for i in range(len(m.branch2)):
            if isinstance(m.branch2[i], nn.SiLU):
                m.branch2[i] = SiLU()
    if isinstance(m, nn.Upsample):
        m.recompute_scale_factor = None
y = model(img) 

Fusing layers... 
(1, 3, 640, 640)


TypeError: conv2d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)


## CV2 Inference

In [14]:
import cv2

full_model_path = "./weights/yolov5s.onnx"
opencv_net = cv2.dnn.readNetFromONNX(full_model_path)

In [22]:
import numpy as np
img_path = "../../../thesis_exportable_module/images/test_images/Masked.png"

input_img = cv2.imread(img_path)
input_img = input_img.astype(np.float32)

mean = np.array([0.485, 0.456, 0.406]) * 255.0
scale = 1 / 255.0
std = [0.229, 0.224, 0.225]

input_blob = cv2.dnn.blobFromImage(
    image=input_img,
    scalefactor=scale,
    size=(640, 640),  # img target size
    mean=mean,
    swapRB=True,  # BGR -> RGB
    crop=True  # center crop
)

print("input blob", input_blob.shape)

# set OpenCV DNN input
opencv_net.setInput(input_blob)
# OpenCV DNN inference
out = opencv_net.forward()
print("OpenCV DNN prediction: \n")
print("* shape: ", out.shape)
# get the predicted class ID
imagenet_class_id = np.argmax(out)

print(out)
print(out.shape)
# get confidence
# confidence = out[0][imagenet_class_id]
# print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id]))
# print("* confidence: {:.4f}".format(confidence))

input blob (1, 3, 640, 640)
OpenCV DNN prediction: 

* shape:  (1, 25200, 85)
[[[     3.0206      3.2279      8.3343 ...   0.0026812  0.00083698   0.0037202]
  [     12.004      3.8817      23.399 ...   0.0025475   0.0011141   0.0035167]
  [     18.029      3.9822      29.994 ...   0.0028336   0.0010802   0.0036389]
  ...
  [        571      602.34      138.69 ...  0.00093788  0.00089743  0.00076715]
  [     580.93      605.21      118.05 ...   0.0011593  0.00081392  0.00078583]
  [     617.62       617.8      131.52 ...   0.0016312  0.00099089   0.0010197]]]
(1, 25200, 85)


In [17]:
# inference of prediction
from utils.general import non_max_suppression_face
pred = out[0]
img_size = 640
conf_thres = 0.6
iou_thres = 0.5
imgsz=(640, 640)
pred = non_max_suppression_face(pred, conf_thres, iou_thres)
print(len(pred[0]), 'face' if len(pred[0]) == 1 else 'faces')

IndexError: tuple index out of range