In [None]:
!python detect.py --weights yolov7_230111.pt --conf 0.25 --img-size 1280 --source inference/images

In [None]:
!pip install --upgrade setuptools pip --user
#!pip install --ignore-installed PyYAML
#!pip install Pillow

!pip install onnx 
!pip install onnxruntime
!pip install protobuf<4.21.3
!pip install onnxruntime-gpu
!pip install onnx>=1.9.0
!pip install onnx-simplifier>=0.3.6 --user

In [None]:
!python export.py --weights ./yolov7_230111.pt \
        --grid --end2end --simplify \
        --topk-all 20 --iou-thres 0.5 --conf-thres 0.35 \
        --img-size 640 640 --max-wh 640

In [1]:
# import cv2
# import time
# import requests
# import random
# import numpy as np
# import onnxruntime as ort
# from PIL import Image
# from pathlib import Path
# from collections import OrderedDict,namedtuple

import cv2
import time
import requests
import random
import numpy as np
import onnxruntime as ort
from PIL import Image
from pathlib import Path
from collections import OrderedDict,namedtuple


cuda = True
w = "yolov7_230111.onnx"

providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
print(providers)
session = ort.InferenceSession(w, providers=providers)


def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, r, (dw, dh)


['CUDAExecutionProvider', 'CPUExecutionProvider']


In [2]:
def translate_input(cv2_img, img_shape = (640, 640)):
        names= [
                'headline',
                'doc',
                'cir_stamp',
                'rec_stamp'
        ]

        colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(names)}

        img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB)

        image = img.copy()
        image, ratio, dwdh = letterbox(image, new_shape=img_shape , auto=False)
        image = image.transpose((2, 0, 1))
        image = np.expand_dims(image, 0)
        image = np.ascontiguousarray(image)

        im = image.astype(np.float32)
        im /= 255
        im.shape

        outname = [i.name for i in session.get_outputs()]
        outname

        inname = [i.name for i in session.get_inputs()]
        inname

        inp = {inname[0]:im}
        return inp, outname, [ratio, dwdh, names, colors]

In [3]:
import time

In [4]:
cv2_img = cv2.imread(r'inference/images/0E31BE6F-D4B6-4678-BAC5-323F94679D96_vpQbc_1670227956485.jpg')
# ONNX inference
start_time = time.time()
inp, outname, translate_params = translate_input(cv2_img)
outputs = session.run(outname, inp)[0]
print("done in:", time.time() - start_time)

done in: 4.538798809051514


In [5]:
def visualize_detection(cv2_img, translate_params):
    ratio, dwdh, names, colors = translate_params
    ori_images = [cv2_img.copy()]
    for i,(batch_id,x0,y0,x1,y1,cls_id,score) in enumerate(outputs):
        image = ori_images[int(batch_id)]
        box = np.array([x0,y0,x1,y1])
        box -= np.array(dwdh*2)
        box /= ratio
        box = box.round().astype(np.int32).tolist()
        cls_id = int(cls_id)
        score = round(float(score),3)
        name = names[cls_id]
        color = colors[name]
        name += ' '+str(score)
        cv2.rectangle(image,box[:2],box[2:],color,2)
        cv2.putText(image,name,(box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2)  

    return ori_images[0]
    # Image.fromarray(ori_images[0])
    # cv2.imwrite("output.jpg", ori_images[0])

In [6]:
import glob
list_img = glob.glob("D:/PPYData/image/*.*")

In [7]:
len(list_img)

994

In [8]:
for idx, each in enumerate(list_img[600:800]):
    fname = each.split("\\")[-1]
    cv2_img = cv2.imread(each)
    # ONNX inference
    start_time = time.time()
    inp, outname, translate_params = translate_input(cv2_img)
    outputs = session.run(outname, inp)[0]
    # print("done in:", time.time() - start_time)

    # print(outputs)
    # batch_id,x0,y0,x1,y1,cls_id,score = outputs
    include_cir_tamp = bool(sum([True if int(each[5]) == 2 else False for each in outputs ]))
    if include_cir_tamp == False:
        vis_img = visualize_detection(cv2_img, translate_params)
        # Image.fromarray(vis_img)
        cv2.imwrite(f"out_imgs/{fname}.jpg", vis_img)

KeyboardInterrupt: 

In [33]:
batch_id,x0,y0,x1,y1,cls_id,score = outputs[0]
print(cls_id)

1.0


In [1]:
import cv2
from inference import DocumentLayoutDetection

documentLayoutDetection = DocumentLayoutDetection("yolov7_230111.onnx", cuda=False)

In [3]:
test_img = cv2.imread(r"inference\images\1BA61ED3-7520-4163-AAA6-68FA8FAFC900_3Knkm_1670161583401.jpg")
res, translate_params = documentLayoutDetection.run_infer(test_img)
vis_img = documentLayoutDetection.visualize_detection(test_img, translate_params, res)
cv2.imwrite("test.jpg", vis_img)


True