roboflow · Jacobsolawetz · Dec 28, 2021 · Jul 14, 2022 · Jul 18, 2022 · Sep 5, 2022
diff --git a/README.md b/README.md
@@ -34,17 +34,9 @@ To use the Roboflow Inference API as your detection engine:
 Upload, annotate, and train your model on Roboflow with [Roboflow Train](https://docs.roboflow.com/train).
 Your model will be hosted on an inference URL.
 
-To use YOLOv5 as your detection engine:
-
-Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/)
-
-The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ)
-
-The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`
-
 ## Performing Object Tracking
 
-###Clone repositories
+### Clone repositories
 
 ```
 git clone https://github.com/roboflow-ai/zero-shot-object-tracking
@@ -77,13 +69,25 @@ python clip_object_tracker.py --source data/video/fish.mp4 --url https://detect.
 
 **NOTE you must provide a valid API key from [Roboflow](docs.roboflow.com)
 
-###Run with YOLOv5
+
+
+### Run with the latest YOLOv5
+Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/).
+The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ). The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`
 ```bash
 
 python clip_object_tracker.py --weights models/yolov5s.pt --source data/video/fish.mp4 --detection-engine yolov5 --info
 ```
 
-###Run with YOLOv4
+### Run with any model on Torch Hub
+This method allows you to use the latest Yolov5 version found on [Torch Hub](https://pytorch.org/hub/ultralytics_yolov5/). The `github` flag sets the model path on torch hub which is `ultralytics/yolov5` by default. The `type` flag sets the type of model like yolov5s, yolov5l, custom, etc. The default is `yolov5s`. The `sourceType` flag determines where to load the weights from which is either local or github. The default is local which means you have to provide pretrained weights.
+```bash
+
+python clip_object_tracker.py --weights models/yolov5s.pt --source "data/video/cars.mp4" --detection-engine hub --github ultralytics/yolov5 --type yolov5s --sourceType github --info
+
+```
+
+### Run with YOLOv4
 To use YOLOv4 for object detection you will need pretrained weights (.weights file), a model config for your weights (.cfg), and a class names file (.names). Test weights can be found here https://github.com/AlexeyAB/darknet. [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg)
 ```
 python clip_object_tracker.py --weights yolov4.weights --cfg yolov4.cfg --names coco.names --source data/video/cars.mp4 --detection-engine yolov4 --info

diff --git a/clip_object_tracker.py b/clip_object_tracker.py
@@ -2,30 +2,33 @@
 import time
 from pathlib import Path
 
-import clip
+
+
 
 import cv2
 import torch
 import torch.backends.cudnn as cudnn
 from numpy import random
 import numpy as np
 
-from models.experimental import attempt_load
-from utils.datasets import LoadStreams, LoadImages
-from utils.general import xyxy2xywh, xywh2xyxy, \
+from tools.datasets import LoadStreams, LoadImages
+from tools.general import xyxy2xywh, xywh2xyxy, \
     strip_optimizer, set_logging, increment_path, scale_coords
-from utils.plots import plot_one_box
-from utils.torch_utils import select_device, time_synchronized
-from utils.roboflow import predict_image
+from tools.plots import plot_one_box
+from tools.torch_utils import select_device, time_synchronized
+from tools.roboflow import predict_image
 
 # deep sort imports
 from deep_sort import preprocessing, nn_matching
 from deep_sort.detection import Detection
 from deep_sort.tracker import Tracker
 from tools import generate_clip_detections as gdet
 
-from utils.yolov5 import Yolov5Engine
-from utils.yolov4 import Yolov4Engine
+import clip
+
+from tools.yolov4 import Yolov4Engine
+from tools.hub import TorchHubEngine
+
 
 classes = []
 
@@ -43,6 +46,7 @@ def update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img,
         class_num = track.class_num
         bbox = xyxy
         class_name = names[int(class_num)] if opt.detection_engine == "yolov5" else class_num
+        print("track bbox",bbox)
         if opt.info:
             print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(
                 str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
@@ -98,19 +102,22 @@ def detect(save_img=False):
     # calculate cosine distance metric
     metric = nn_matching.NearestNeighborDistanceMetric(
         "cosine", max_cosine_distance, nn_budget)
-
-    # load yolov5 model here
-    if opt.detection_engine == "yolov5":
-        yolov5_engine = Yolov5Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)
-        global names
-        names = yolov5_engine.get_names()
-    elif opt.detection_engine == "yolov4":
-        yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)
 
+    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
+    # load object detection model here if necessary
+    if opt.detection_engine == "yolov4":
+        yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)
+    if opt.detection_engine == "hub":
+        print("initializing torch hub engine")
+        hub_engine = TorchHubEngine(opt.github, opt.type, opt.weights, opt.sourceType, opt.classes, opt.confidence, opt.overlap, opt.augment,imgsz)
+    if opt.detection_engine == "yolov5":
+        print("initializing torch hub engine for yolov5")
+        hub_engine = TorchHubEngine("ultralytics/yolov5", opt.type, opt.weights, "github", opt.classes, opt.confidence, opt.overlap, opt.augment,imgsz)
+        opt.detection_engine = "hub"
     # initialize tracker
     tracker = Tracker(metric)
 
-    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
+
     webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
         ('rtsp://', 'rtmp://', 'http://'))
 
@@ -137,8 +144,8 @@ def detect(save_img=False):
 
     frame_count = 0
     img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
-    if opt.detection_engine == "yolov5":
-        _ = yolov5_engine.infer(img.half() if half else img) if device.type != 'cpu' else None  # run once
+    if opt.detection_engine == "hub":
+        _ = hub_engine.infer(img.half() if half else img) if device.type != 'cpu' else None  # run once
     for path, img, im0s, vid_cap in dataset:
 
         img = torch.from_numpy(img).to(device)
@@ -155,108 +162,110 @@ def detect(save_img=False):
         if opt.detection_engine == "roboflow":
             pred, classes = predict_image(im0, opt.api_key, opt.url, opt.confidence, opt.overlap, frame_count)
             pred = [torch.tensor(pred)]
-        elif opt.detection_engine == "yolov5":
-            print("yolov5 inference")
-            pred = yolov5_engine.infer(img)
-        else:
+        elif opt.detection_engine == "yolov4":
             print("yolov4 inference {}".format(im0.shape))
             pred = yolov4_engine.infer(im0)
             pred, classes = yolov4_engine.postprocess(pred, im0.shape)
             pred = [torch.tensor(pred)]
+        elif opt.detection_engine == "hub":
+            print("torch hub inference")
+            pred, classes = hub_engine.infer(im0)
 
         t2 = time_synchronized()
         # Process detections
-        for i, det in enumerate(pred):  # detections per image
-            #moved up to roboflow inference
-            """if webcam:  # batch_size >= 1
-                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
-                ), dataset.count
-            else:
-                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)"""
-
-            p = Path(p)  # to Path
-            save_path = str(save_dir / p.name)  # img.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + \
-                ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
-
-            # normalization gain whwh
-            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
-            if len(det):
-
-                print("\n[Detections]")
-                if opt.detection_engine == "roboflow":
-                    # Print results
-                    clss = np.array(classes)
-                    for c in np.unique(clss):
-                        n = (clss == c).sum()  # detections per class
-                        s += f'{n} {c}, '  # add to string
-
-                    trans_bboxes = det[:, :4].clone()
-                    bboxes = trans_bboxes[:, :4].cpu()
-                    confs = det[:, 4]
-
-                elif opt.detection_engine == "yolov4":
-
-                    # Print results
-                    # Rescale boxes from img_size to im0 size
-                    #det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round()
-                    clss = np.array(classes)
-                    for c in np.unique(clss):
-                        n = (clss == c).sum()  # detections per class
-                        s += f'{n} {c}, '  # add to string
-
-
-                    # Transform bboxes from tlbr to tlwh
-                    trans_bboxes = det[:, :4].clone()
-                    bboxes = trans_bboxes[:, :4].cpu()
-                    confs = det[:, 4]
-
-                    """for idx, box in enumerate(bboxes):
-                        plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx],
-                                     color=get_color_for(classes[idx]), line_thickness=opt.thickness)"""
-
-                    print(s)
-                else:
-
-                    # Print results
-                    # Rescale boxes from img_size to im0 size
-
-                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
-                    for c in det[:, -1].unique():
-                        n = (det[:, -1] == c).sum()  # detections per class
-                        s += f'{n} {names[int(c)]}s, '  # add to string
-
-                    # Transform bboxes from tlbr to tlwh
-                    trans_bboxes = det[:, :4].clone()
-                    trans_bboxes[:, 2:] -= trans_bboxes[:, :2]
-                    bboxes = trans_bboxes[:, :4].cpu()
-                    confs = det[:, 4]
-                    class_nums = det[:, -1]
-                    classes = class_nums
-
-                    print(s)
-
-
-
-                # encode yolo detections and feed to tracker
-                features = encoder(im0, bboxes)
-                detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip(
-                    bboxes, confs, classes, features)]
-
-                # run non-maxima supression
-                boxs = np.array([d.tlwh for d in detections])
-                scores = np.array([d.confidence for d in detections])
-                class_nums = np.array([d.class_num for d in detections])
-                indices = preprocessing.non_max_suppression(
-                    boxs, class_nums, nms_max_overlap, scores)
-                detections = [detections[i] for i in indices]
-
-                # Call the tracker
-                tracker.predict()
-                tracker.update(detections)
-
-                # update tracks
-                update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn)
+
+        """if webcam:  # batch_size >= 1
+            p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
+            ), dataset.count
+        else:
+            p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)"""
+
+        p = Path(p)  # to Path
+        save_path = str(save_dir / p.name)  # img.jpg
+        txt_path = str(save_dir / 'labels' / p.stem) + \
+            ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
+
+        # normalization gain whwh
+        gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
+        if len(pred):
+
+            print("\n[Detections]")
+            if opt.detection_engine == "roboflow":
+                # Print results
+                clss = np.array(classes)
+                for c in np.unique(clss):
+                    n = (clss == c).sum()  # detections per class
+                    s += f'{n} {c}, '  # add to string
+
+                trans_bboxes = pred[:, :4].clone()
+                bboxes = trans_bboxes[:, :4].cpu()
+                confs = pred[:, 4]
+
+            elif opt.detection_engine == "yolov4":
+
+                # Print results
+                # Rescale boxes from img_size to im0 size
+                #det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round()
+                clss = np.array(classes)
+                for c in np.unique(clss):
+                    n = (clss == c).sum()  # detections per class
+                    s += f'{n} {c}, '  # add to string
+
+
+                # Transform bboxes from tlbr to tlwh
+                trans_bboxes = pred[:, :4].clone()
+                bboxes = trans_bboxes[:, :4].cpu()
+                confs = pred[:, 4]
+
+                """for idx, box in enumerate(bboxes):
+                    plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx],
+                                 color=get_color_for(classes[idx]), line_thickness=opt.thickness)"""
+
+                print(s)
+            elif opt.detection_engine == "hub":
+
+                # Print results
+                # Rescale boxes from img_size to im0 size
+
+                xs = im0.shape[1]/imgsz
+                ys = im0.shape[0]/imgsz
+                clss = np.array(classes)
+                for c in np.unique(clss):
+                    n = (clss == c).sum()  # detections per class
+                    s += f'{n} {c}, '  # add to string
+
+                # Transform bboxes from bltr to tlwh and perform scale back to im0 size
+                trans_bboxes = pred[:, :4].clone().cpu().numpy()
+                trans_bboxes = [[(tb[0]*xs)-(tb[2]*xs)/2, (tb[1]*ys)-(tb[3]*ys)/2, tb[2]*xs, tb[3]*ys] for tb in trans_bboxes if tb[2] > 0 and tb[3] > 0]
+                trans_bboxes = torch.tensor(trans_bboxes)
+                bboxes = trans_bboxes[:, :4].cpu()
+                confs = pred[:, 4]
+
+                print(s)
+
+            # moved up to roboflow inference
+
+            # encode yolo detections and feed to tracker
+            features = encoder(im0, bboxes)
+            detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip(
+                bboxes, confs, classes, features)]
+
+            # run non-maxima supression
+            boxs = np.array([d.tlwh for d in detections])
+            print(boxs)
+            scores = np.array([d.confidence for d in detections])
+            class_nums = np.array([d.class_num for d in detections])
+            indices = preprocessing.non_max_suppression(
+                boxs, class_nums, nms_max_overlap, scores)
+            detections = [detections[i] for i in indices]
+
+
+            # Call the tracker
+            tracker.predict()
+            tracker.update(detections)
+
+            # update tracks
+            update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn)
 
             # Print time (inference + NMS)
             print(f'Done. ({t2 - t1:.3f}s)')
@@ -297,7 +306,7 @@ def detect(save_img=False):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--weights', nargs='+', type=str,
-                        default='yolov5s.pt', help='model.pt path(s)')
+                        default=None, help='model.pt path(s)')
     parser.add_argument('--cfg', type=str,
                         default='yolov4.cfg', help='yolov4 model cfg file path')
     parser.add_argument('--names', type=str,
@@ -343,6 +352,12 @@ def detect(save_img=False):
                         help='Maximum size of the appearance descriptors allery. If None, no budget is enforced.')
     parser.add_argument('--api_key', default=None,
                         help='Roboflow API Key.')
+    parser.add_argument('--github', default="ultralytics/yolov5",
+                        help='Torch hub github. ex: ultralytics/yolov5')
+    parser.add_argument('--type', default="yolov5s",
+                        help='Torch hub model type. ex: yolov5s, custom')
+    parser.add_argument('--sourceType', default="github",
+                        help='where to load the model repo from. ex: github, local')
     parser.add_argument('--url', default=None,
                         help='Roboflow Model URL.')
     parser.add_argument('--info', action='store_true',

diff --git a/requirements.txt b/requirements.txt
@@ -22,4 +22,4 @@ pandas
 
 # clip
 ftfy==6.0.3
-regex==2.5.86
+regex
diff --git a/models/__init__.py → tools/__init__.py b/models/__init__.py → tools/__init__.py
diff --git a/utils/activations.py → tools/activations.py b/utils/activations.py → tools/activations.py
diff --git a/utils/autoanchor.py → tools/autoanchor.py b/utils/autoanchor.py → tools/autoanchor.py
@@ -95,7 +95,7 @@ def print_results(k):
     if isinstance(path, str):  # *.yaml file
         with open(path) as f:
             data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
-        from utils.datasets import LoadImagesAndLabels
+        from utils import LoadImagesAndLabels
         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
     else:
         dataset = path  # dataset

diff --git a/utils/datasets.py → tools/datasets.py b/utils/datasets.py → tools/datasets.py
@@ -19,8 +19,8 @@
 from torch.utils.data import Dataset
 from tqdm import tqdm
 
-from utils.general import xyxy2xywh, xywh2xyxy, clean_str
-from utils.torch_utils import torch_distributed_zero_first
+from tools.general import xyxy2xywh, xywh2xyxy, clean_str
+from tools.torch_utils import torch_distributed_zero_first
 
 # Parameters
 help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'