Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New YOLOv5 support #19

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 15 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,9 @@ To use the Roboflow Inference API as your detection engine:
Upload, annotate, and train your model on Roboflow with [Roboflow Train](https://docs.roboflow.com/train).
Your model will be hosted on an inference URL.

To use YOLOv5 as your detection engine:

Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/)

The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ)

The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`

## Performing Object Tracking

###Clone repositories
### Clone repositories

```
git clone https://github.com/roboflow-ai/zero-shot-object-tracking
Expand Down Expand Up @@ -77,13 +69,25 @@ python clip_object_tracker.py --source data/video/fish.mp4 --url https://detect.

**NOTE you must provide a valid API key from [Roboflow](docs.roboflow.com)

###Run with YOLOv5


### Run with the latest YOLOv5
Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/).
The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ). The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`
```bash

python clip_object_tracker.py --weights models/yolov5s.pt --source data/video/fish.mp4 --detection-engine yolov5 --info
```

###Run with YOLOv4
### Run with any model on Torch Hub
This method allows you to use the latest Yolov5 version found on [Torch Hub](https://pytorch.org/hub/ultralytics_yolov5/). The `github` flag sets the model path on torch hub which is `ultralytics/yolov5` by default. The `type` flag sets the type of model like yolov5s, yolov5l, custom, etc. The default is `yolov5s`. The `sourceType` flag determines where to load the weights from which is either local or github. The default is local which means you have to provide pretrained weights.
```bash

python clip_object_tracker.py --weights models/yolov5s.pt --source "data/video/cars.mp4" --detection-engine hub --github ultralytics/yolov5 --type yolov5s --sourceType github --info

```

### Run with YOLOv4
To use YOLOv4 for object detection you will need pretrained weights (.weights file), a model config for your weights (.cfg), and a class names file (.names). Test weights can be found here https://github.com/AlexeyAB/darknet. [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg)
```
python clip_object_tracker.py --weights yolov4.weights --cfg yolov4.cfg --names coco.names --source data/video/cars.mp4 --detection-engine yolov4 --info
Expand Down
247 changes: 131 additions & 116 deletions clip_object_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,33 @@
import time
from pathlib import Path

import clip



import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import numpy as np

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import xyxy2xywh, xywh2xyxy, \
from tools.datasets import LoadStreams, LoadImages
from tools.general import xyxy2xywh, xywh2xyxy, \
strip_optimizer, set_logging, increment_path, scale_coords
from utils.plots import plot_one_box
from utils.torch_utils import select_device, time_synchronized
from utils.roboflow import predict_image
from tools.plots import plot_one_box
from tools.torch_utils import select_device, time_synchronized
from tools.roboflow import predict_image

# deep sort imports
from deep_sort import preprocessing, nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_clip_detections as gdet

from utils.yolov5 import Yolov5Engine
from utils.yolov4 import Yolov4Engine
import clip

from tools.yolov4 import Yolov4Engine
from tools.hub import TorchHubEngine


classes = []

Expand All @@ -43,6 +46,7 @@ def update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img,
class_num = track.class_num
bbox = xyxy
class_name = names[int(class_num)] if opt.detection_engine == "yolov5" else class_num
print("track bbox",bbox)
if opt.info:
print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(
str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
Expand Down Expand Up @@ -98,19 +102,22 @@ def detect(save_img=False):
# calculate cosine distance metric
metric = nn_matching.NearestNeighborDistanceMetric(
"cosine", max_cosine_distance, nn_budget)

# load yolov5 model here
if opt.detection_engine == "yolov5":
yolov5_engine = Yolov5Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)
global names
names = yolov5_engine.get_names()
elif opt.detection_engine == "yolov4":
yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)

source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
# load object detection model here if necessary
if opt.detection_engine == "yolov4":
yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)
if opt.detection_engine == "hub":
print("initializing torch hub engine")
hub_engine = TorchHubEngine(opt.github, opt.type, opt.weights, opt.sourceType, opt.classes, opt.confidence, opt.overlap, opt.augment,imgsz)
if opt.detection_engine == "yolov5":
print("initializing torch hub engine for yolov5")
hub_engine = TorchHubEngine("ultralytics/yolov5", opt.type, opt.weights, "github", opt.classes, opt.confidence, opt.overlap, opt.augment,imgsz)
opt.detection_engine = "hub"
# initialize tracker
tracker = Tracker(metric)

source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://'))

Expand All @@ -137,8 +144,8 @@ def detect(save_img=False):

frame_count = 0
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
if opt.detection_engine == "yolov5":
_ = yolov5_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once
if opt.detection_engine == "hub":
_ = hub_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:

img = torch.from_numpy(img).to(device)
Expand All @@ -155,108 +162,110 @@ def detect(save_img=False):
if opt.detection_engine == "roboflow":
pred, classes = predict_image(im0, opt.api_key, opt.url, opt.confidence, opt.overlap, frame_count)
pred = [torch.tensor(pred)]
elif opt.detection_engine == "yolov5":
print("yolov5 inference")
pred = yolov5_engine.infer(img)
else:
elif opt.detection_engine == "yolov4":
print("yolov4 inference {}".format(im0.shape))
pred = yolov4_engine.infer(im0)
pred, classes = yolov4_engine.postprocess(pred, im0.shape)
pred = [torch.tensor(pred)]
elif opt.detection_engine == "hub":
print("torch hub inference")
pred, classes = hub_engine.infer(im0)

t2 = time_synchronized()
# Process detections
for i, det in enumerate(pred): # detections per image
#moved up to roboflow inference
"""if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)"""

p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + \
('' if dataset.mode == 'image' else f'_{frame}') # img.txt

# normalization gain whwh
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
if len(det):

print("\n[Detections]")
if opt.detection_engine == "roboflow":
# Print results
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string

trans_bboxes = det[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = det[:, 4]

elif opt.detection_engine == "yolov4":

# Print results
# Rescale boxes from img_size to im0 size
#det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round()
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string


# Transform bboxes from tlbr to tlwh
trans_bboxes = det[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = det[:, 4]

"""for idx, box in enumerate(bboxes):
plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx],
color=get_color_for(classes[idx]), line_thickness=opt.thickness)"""

print(s)
else:

# Print results
# Rescale boxes from img_size to im0 size

det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f'{n} {names[int(c)]}s, ' # add to string

# Transform bboxes from tlbr to tlwh
trans_bboxes = det[:, :4].clone()
trans_bboxes[:, 2:] -= trans_bboxes[:, :2]
bboxes = trans_bboxes[:, :4].cpu()
confs = det[:, 4]
class_nums = det[:, -1]
classes = class_nums

print(s)



# encode yolo detections and feed to tracker
features = encoder(im0, bboxes)
detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip(
bboxes, confs, classes, features)]

# run non-maxima supression
boxs = np.array([d.tlwh for d in detections])
scores = np.array([d.confidence for d in detections])
class_nums = np.array([d.class_num for d in detections])
indices = preprocessing.non_max_suppression(
boxs, class_nums, nms_max_overlap, scores)
detections = [detections[i] for i in indices]

# Call the tracker
tracker.predict()
tracker.update(detections)

# update tracks
update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn)

"""if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)"""

p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + \
('' if dataset.mode == 'image' else f'_{frame}') # img.txt

# normalization gain whwh
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
if len(pred):

print("\n[Detections]")
if opt.detection_engine == "roboflow":
# Print results
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string

trans_bboxes = pred[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = pred[:, 4]

elif opt.detection_engine == "yolov4":

# Print results
# Rescale boxes from img_size to im0 size
#det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round()
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string


# Transform bboxes from tlbr to tlwh
trans_bboxes = pred[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = pred[:, 4]

"""for idx, box in enumerate(bboxes):
plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx],
color=get_color_for(classes[idx]), line_thickness=opt.thickness)"""

print(s)
elif opt.detection_engine == "hub":

# Print results
# Rescale boxes from img_size to im0 size

xs = im0.shape[1]/imgsz
ys = im0.shape[0]/imgsz
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string

# Transform bboxes from bltr to tlwh and perform scale back to im0 size
trans_bboxes = pred[:, :4].clone().cpu().numpy()
trans_bboxes = [[(tb[0]*xs)-(tb[2]*xs)/2, (tb[1]*ys)-(tb[3]*ys)/2, tb[2]*xs, tb[3]*ys] for tb in trans_bboxes if tb[2] > 0 and tb[3] > 0]
trans_bboxes = torch.tensor(trans_bboxes)
bboxes = trans_bboxes[:, :4].cpu()
confs = pred[:, 4]

print(s)

# moved up to roboflow inference

# encode yolo detections and feed to tracker
features = encoder(im0, bboxes)
detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip(
bboxes, confs, classes, features)]

# run non-maxima supression
boxs = np.array([d.tlwh for d in detections])
print(boxs)
scores = np.array([d.confidence for d in detections])
class_nums = np.array([d.class_num for d in detections])
indices = preprocessing.non_max_suppression(
boxs, class_nums, nms_max_overlap, scores)
detections = [detections[i] for i in indices]


# Call the tracker
tracker.predict()
tracker.update(detections)

# update tracks
update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn)

# Print time (inference + NMS)
print(f'Done. ({t2 - t1:.3f}s)')
Expand Down Expand Up @@ -297,7 +306,7 @@ def detect(save_img=False):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str,
default='yolov5s.pt', help='model.pt path(s)')
default=None, help='model.pt path(s)')
parser.add_argument('--cfg', type=str,
default='yolov4.cfg', help='yolov4 model cfg file path')
parser.add_argument('--names', type=str,
Expand Down Expand Up @@ -343,6 +352,12 @@ def detect(save_img=False):
help='Maximum size of the appearance descriptors allery. If None, no budget is enforced.')
parser.add_argument('--api_key', default=None,
help='Roboflow API Key.')
parser.add_argument('--github', default="ultralytics/yolov5",
help='Torch hub github. ex: ultralytics/yolov5')
parser.add_argument('--type', default="yolov5s",
help='Torch hub model type. ex: yolov5s, custom')
parser.add_argument('--sourceType', default="github",
help='where to load the model repo from. ex: github, local')
parser.add_argument('--url', default=None,
help='Roboflow Model URL.')
parser.add_argument('--info', action='store_true',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ pandas

# clip
ftfy==6.0.3
regex==2.5.86
regex
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion utils/autoanchor.py → tools/autoanchor.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def print_results(k):
if isinstance(path, str): # *.yaml file
with open(path) as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
from utils.datasets import LoadImagesAndLabels
from utils import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
else:
dataset = path # dataset
Expand Down
4 changes: 2 additions & 2 deletions utils/datasets.py → tools/datasets.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
from torch.utils.data import Dataset
from tqdm import tqdm

from utils.general import xyxy2xywh, xywh2xyxy, clean_str
from utils.torch_utils import torch_distributed_zero_first
from tools.general import xyxy2xywh, xywh2xyxy, clean_str
from tools.torch_utils import torch_distributed_zero_first

# Parameters
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
Expand Down