opencv · obtx · Jul 19, 2022 · Jul 29, 2022 · Jul 29, 2022 · Jul 31, 2022
diff --git a/multitask_centernet/LICENSE b/multitask_centernet/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Shiqi Yu <shiqi.yu@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/multitask_centernet/README.md b/multitask_centernet/README.md
@@ -0,0 +1,32 @@
+# MCN
+
+Multitask-Centernet (MCN)  is a multi-task network (MTN). Studies have shown that training with multiple tasks linked to each other can sometimes even improve the quality of training and prediction compared to single-task learning (STL). When the network receives the same type of input, it is likely to extract similar features. In this case, a shared backbone can take advantage of the similar semantics of these input features.
+
+Notes:
+- Model source: [here](https://github.com/ShiqiYu/libfacedetection.train/blob/a61a428929148171b488f024b5d6774f93cdbc13/tasks/task1/onnx/yunet.onnx).
+- For details on training this model, please visit my home page
+- This ONNX model has fixed input shape, but OpenCV DNN infers on the exact shape of input image. See https://github.com/opencv/opencv_zoo/issues/63 for more information.
+
+## Demo
+
+Run the following command to try the demo:
+```shell
+# detect on camera input
+python demo.py
+# detect on an image
+python demo.py --input /path/to/image
+```
+
+### Example outputs
+
+![detection and pose estimation demo](./examples/ori_vis_0.png)
+
+![semantic segmentation demo](./examples/ori_vis_masks_0.png)
+
+## License
+
+All files in this directory are licensed under [MIT License](./LICENSE).
+
+## Reference
+
+- https://arxiv.org/abs/2108.05060v2
diff --git a/multitask_centernet/demo.py b/multitask_centernet/demo.py
@@ -0,0 +1,73 @@
+from data import MultiCOCO
+from models import MulCenternet
+import argparse
+import torch
+import torch.nn as nn
+import os
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+import cv2
+from utils import *
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Test single image.')
+    parser.add_argument('--image-path', type=str, default='../datasets/coco2017/val2017/000000000785.jpg')
+    parser.add_argument('--save-root', type=str, default='./inference/')
+    parser.add_argument('--checkpoint_path', type=str, default='./exps/epoch_3.pkl')
+    return parser.parse_args()
+
+
+def main(args):
+    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
+
+    os.makedirs(args.save_root, exist_ok=True)
+    image_name = args.image_path.rsplit('/', 2)[-1]
+    image_path = os.path.join(args.save_root, image_name)
+
+    transforms = A.Compose([
+        A.Resize(512, 512),
+        A.Normalize(mean=[0.408, 0.447, 0.470], std=[0.289, 0.274, 0.278]),
+        ToTensorV2()
+    ])
+    image = cv2.imread(args.image_path)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+    augmented = transforms(image=image)
+    model = torch.load(args.checkpoint_path)
+    if torch.cuda.is_available():
+        model = model.cuda()
+        image = augmented['image'].cuda().unsqueeze(0)
+
+    model.eval()
+    with torch.no_grad():
+        output = model(image)
+        for k, v in output.items():
+            print(k, v.shape, v.dtype)
+
+    detection = ctdet_decode(
+        output["heatmap"].sigmoid_(),
+        output["width_height"],
+        reg=output["bbox_offset"],
+    )
+    detection = detection.cpu().detach().squeeze()
+
+    image = cv2.imread(args.image_path)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    t = A.Compose([
+        A.Resize(512, 512)
+    ])
+    image = t(image=image)['image']
+
+    for d in detection:
+        x1, y1, x2, y2, score, cls = tuple(d)
+        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (255, 0, 0), 1)
+
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    cv2.imwrite(image_path, image)
+    print('Image has been saved to ', image_path)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
diff --git a/multitask_centernet/demo_new.py b/multitask_centernet/demo_new.py
@@ -0,0 +1,25 @@
+import cv2
+import onnx
+import numpy as np
+from utils.datasets import letterbox
+from utils.general import non_max_suppression_kpt, xywh2xyxy
+from utils.plots import output_to_keypoint, plot_skeleton_kpts, plot_one_box
+
+onnx_name = 'w6-pose.onnx'
+model_ = onnx.load(onnx_name)
+onnx.checker.check_model(model_)
+model = cv2.dnn.readNetFromONNX(onnx_name)
+print("loading complete")
+
+
+# 读取图片并预处理
+image = cv2.imread('./test_data/2.jpg')
+image = letterbox(image, 960, stride=64, auto=True)[0]
+# print(image.shape)
+blob = cv2.dnn.blobFromImage(cv2.resize(image, (640, 640)))
+# print(blob.shape)
+
+# 推理
+model.setInput(blob)
+output = model.forward()
+print(output.shape)
diff --git a/multitask_centernet/demo_onnx.py b/multitask_centernet/demo_onnx.py
@@ -0,0 +1,126 @@
+import os
+import numpy as np
+import cv2
+import argparse
+import onnxruntime
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--model-path", type=str, default="./best.onnx")
+parser.add_argument("--img-path", type=str, default="./sample_ips.txt")
+parser.add_argument("--dst-path", type=str, default="./sample_ops_onnxrt")
+args = parser.parse_args()
+
+
+_CLASS_COLOR_MAP = [
+    (0, 0, 255) , # Person (blue).
+    (255, 0, 0) ,  # Bear (red).
+    (0, 255, 0) ,  # Tree (lime).
+    (255, 0, 255) ,  # Bird (fuchsia).
+    (0, 255, 255) ,  # Sky (aqua).
+    (255, 255, 0) ,  # Cat (yellow).
+]
+
+palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
+                    [230, 230, 0], [255, 153, 255], [153, 204, 255],
+                    [255, 102, 255], [255, 51, 255], [102, 178, 255],
+                    [51, 153, 255], [255, 153, 153], [255, 102, 102],
+                    [255, 51, 51], [153, 255, 153], [102, 255, 102],
+                    [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
+                    [255, 255, 255]])
+
+skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12],
+            [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
+            [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
+
+pose_limb_color = palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
+pose_kpt_color = palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
+radius = 5
+
+def read_img(img_file, img_mean=127.5, img_scale=1/127.5):
+    img = cv2.imread(img_file)[:, :, ::-1]
+    img = cv2.resize(img, (640,640), interpolation=cv2.INTER_LINEAR)
+    img = (img - img_mean) * img_scale
+    img = np.asarray(img, dtype=np.float32)
+    img = np.expand_dims(img,0)
+    img = img.transpose(0,3,1,2)
+    return img
+
+
+def model_inference(model_path=None, input=None):
+    #onnx_model = onnx.load(args.model_path)
+    session = onnxruntime.InferenceSession(model_path, None)
+    input_name = session.get_inputs()[0].name
+    output = session.run([], {input_name: input})
+    return output
+
+
+def model_inference_image_list(model_path, img_path=None, mean=None, scale=None, dst_path=None):
+    os.makedirs(args.dst_path, exist_ok=True)
+    img_file_list = list(open(img_path))
+    pbar = enumerate(img_file_list)
+    max_index = 20
+    pbar = tqdm(pbar, total=min(len(img_file_list), max_index))
+    for img_index, img_file  in pbar:
+        pbar.set_description("{}/{}".format(img_index, len(img_file_list)))
+        img_file = img_file.rstrip()
+        input = read_img(img_file, mean, scale)
+        output = model_inference(model_path, input)
+        dst_file = os.path.join(dst_path, os.path.basename(img_file))
+        post_process(img_file, dst_file, output[0], score_threshold=0.3)
+
+
+def post_process(img_file, dst_file, output, score_threshold=0.3):
+    """
+    Draw bounding boxes on the input image. Dump boxes in a txt file.
+    """
+    det_bboxes, det_scores, det_labels, kpts = output[:, 0:4], output[:, 4], output[:, 5], output[:, 6:]
+    img = cv2.imread(img_file)
+    w, h = img.shape[0], img.shape[1]
+    img = cv2.resize(img, (640,640), interpolation=cv2.INTER_LINEAR)
+    #To generate color based on det_label, to look into the codebase of Tensorflow object detection api.
+    dst_txt_file = dst_file.replace('png', 'txt')
+    f = open(dst_txt_file, 'wt')
+    for idx in range(len(det_bboxes)):
+        det_bbox = det_bboxes[idx]
+        kpt = kpts[idx]
+        if det_scores[idx]>0:
+            f.write("{:8.0f} {:8.5f} {:8.5f} {:8.5f} {:8.5f} {:8.5f}\n".format(det_labels[idx], det_scores[idx], det_bbox[1], det_bbox[0], det_bbox[3], det_bbox[2]))
+        if det_scores[idx]>score_threshold:
+            color_map = _CLASS_COLOR_MAP[int(det_labels[idx])]
+            img = cv2.rectangle(img, (det_bbox[0], det_bbox[1]), (det_bbox[2], det_bbox[3]), color_map[::-1], 2)
+            cv2.putText(img, "id:{}".format(int(det_labels[idx])), (int(det_bbox[0]+5),int(det_bbox[1])+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_map[::-1], 2)
+            cv2.putText(img, "score:{:2.1f}".format(det_scores[idx]), (int(det_bbox[0] + 5), int(det_bbox[1]) + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_map[::-1], 2)
+            plot_skeleton_kpts(img, kpt)
+    img = cv2.resize(img, (h, w), interpolation=cv2.INTER_LINEAR)
+    cv2.imwrite(dst_file, img)
+    f.close()
+
+
+def plot_skeleton_kpts(im, kpts, steps=3):
+    num_kpts = len(kpts) // steps
+    #plot keypoints
+    for kid in range(num_kpts):
+        r, g, b = pose_kpt_color[kid]
+        x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1]
+        conf = kpts[steps * kid + 2]
+        if conf > 0.5: #Confidence of a keypoint has to be greater than 0.5
+            cv2.circle(im, (int(x_coord), int(y_coord)), radius, (int(r), int(g), int(b)), -1)
+    #plot skeleton
+    for sk_id, sk in enumerate(skeleton):
+        r, g, b = pose_limb_color[sk_id]
+        pos1 = (int(kpts[(sk[0]-1)*steps]), int(kpts[(sk[0]-1)*steps+1]))
+        pos2 = (int(kpts[(sk[1]-1)*steps]), int(kpts[(sk[1]-1)*steps+1]))
+        conf1 = kpts[(sk[0]-1)*steps+2]
+        conf2 = kpts[(sk[1]-1)*steps+2]
+        if conf1>0.5 and conf2>0.5: # For a limb, both the keypoint confidence must be greater than 0.5
+            cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2)
+
+
+def main():
+    model_inference_image_list(model_path=args.model_path, img_path=args.img_path,
+                               mean=0.0, scale=0.00392156862745098,
+                               dst_path=args.dst_path)
+
+if __name__== "__main__":
+    main()
diff --git a/multitask_centernet/examples/ori_vis_0.png b/multitask_centernet/examples/ori_vis_0.png
diff --git a/multitask_centernet/examples/ori_vis_5.png b/multitask_centernet/examples/ori_vis_5.png
diff --git a/multitask_centernet/examples/ori_vis_masks_0.png b/multitask_centernet/examples/ori_vis_masks_0.png
diff --git a/multitask_centernet/examples/ori_vis_masks_5.png b/multitask_centernet/examples/ori_vis_masks_5.png
diff --git a/multitask_centernet/models/__init__.py b/multitask_centernet/models/__init__.py
@@ -0,0 +1 @@
+# init