Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New YOLOv5 support #19

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 10 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,6 @@ To use the Roboflow Inference API as your detection engine:
Upload, annotate, and train your model on Roboflow with [Roboflow Train](https://docs.roboflow.com/train).
Your model will be hosted on an inference URL.

To use YOLOv5 as your detection engine:

Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/)

The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ)

The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`

## Performing Object Tracking

###Clone repositories
Expand Down Expand Up @@ -77,7 +69,17 @@ python clip_object_tracker.py --source data/video/fish.mp4 --url https://detect.

**NOTE you must provide a valid API key from [Roboflow](docs.roboflow.com)

###Run with Torch Hub Yolov5
This method allows you to use the latest Yolov5 version found on [Torch Hub](https://pytorch.org/hub/ultralytics_yolov5/). The `github` flag sets the model path on torch hub which is `ultralytics/yolov5` by default. The `type` flag sets the type of model like yolov5s, yolov5l, custom, etc. The default is `yolov5s`. The `sourceType` flag determines where to load the weights from which is either local or github. The default is local which means you have to provide pretrained weights.
```bash

python clip_object_tracker.py --weights models/yolov5s.pt --source "data/video/cars.mp4" --detection-engine hub --github ultralytics/yolov5 --type yolov5s --sourceType github --info
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testing custom models and posting ex here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


```

###Run with YOLOv5
Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/).
The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ). The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`
```bash

python clip_object_tracker.py --weights models/yolov5s.pt --source data/video/fish.mp4 --detection-engine yolov5 --info
Expand Down
223 changes: 130 additions & 93 deletions clip_object_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,34 @@
import time
from pathlib import Path

import clip



import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import numpy as np

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import xyxy2xywh, xywh2xyxy, \
from yolov5.utils.datasets import LoadStreams, LoadImages
from yolov5.utils.general import xyxy2xywh, xywh2xyxy, \
strip_optimizer, set_logging, increment_path, scale_coords
from utils.plots import plot_one_box
from utils.torch_utils import select_device, time_synchronized
from utils.roboflow import predict_image
from yolov5.utils.plots import plot_one_box
from yolov5.utils.torch_utils import select_device, time_synchronized
from yolov5.utils.roboflow import predict_image

# deep sort imports
from deep_sort import preprocessing, nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_clip_detections as gdet

from utils.yolov5 import Yolov5Engine
from utils.yolov4 import Yolov4Engine
import clip

from yolov5.utils.yolov5 import Yolov5Engine
from yolov5.utils.yolov4 import Yolov4Engine
from yolov5.utils.hub import TorchHubEngine


classes = []

Expand All @@ -43,6 +47,7 @@ def update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img,
class_num = track.class_num
bbox = xyxy
class_name = names[int(class_num)] if opt.detection_engine == "yolov5" else class_num
print("track bbox",bbox)
if opt.info:
print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format(
str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
Expand Down Expand Up @@ -98,19 +103,21 @@ def detect(save_img=False):
# calculate cosine distance metric
metric = nn_matching.NearestNeighborDistanceMetric(
"cosine", max_cosine_distance, nn_budget)


source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
# load yolov5 model here
if opt.detection_engine == "yolov5":
yolov5_engine = Yolov5Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)
global names
names = yolov5_engine.get_names()
elif opt.detection_engine == "yolov4":
yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half)

if opt.detection_engine == "hub":
hub_engine = TorchHubEngine(opt.github, opt.type, opt.weights, opt.sourceType, opt.classes, opt.confidence, opt.overlap, opt.augment,imgsz)
# initialize tracker
tracker = Tracker(metric)

source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://'))

Expand Down Expand Up @@ -158,105 +165,129 @@ def detect(save_img=False):
elif opt.detection_engine == "yolov5":
print("yolov5 inference")
pred = yolov5_engine.infer(img)
else:
elif opt.detection_engine == "yolov4":
print("yolov4 inference {}".format(im0.shape))
pred = yolov4_engine.infer(im0)
pred, classes = yolov4_engine.postprocess(pred, im0.shape)
pred = [torch.tensor(pred)]
elif opt.detection_engine == "hub":
print("torch hub inference")
pred, classes = hub_engine.infer(im0)

t2 = time_synchronized()
# Process detections
for i, det in enumerate(pred): # detections per image
#moved up to roboflow inference
"""if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)"""

p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + \
('' if dataset.mode == 'image' else f'_{frame}') # img.txt

# normalization gain whwh
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
if len(det):

print("\n[Detections]")
if opt.detection_engine == "roboflow":
# Print results
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string

trans_bboxes = det[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = det[:, 4]

elif opt.detection_engine == "yolov4":

# Print results
# Rescale boxes from img_size to im0 size
#det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round()
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string


# Transform bboxes from tlbr to tlwh
trans_bboxes = det[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = det[:, 4]

"""for idx, box in enumerate(bboxes):
plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx],
color=get_color_for(classes[idx]), line_thickness=opt.thickness)"""

print(s)
else:
"""if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)"""

p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + \
('' if dataset.mode == 'image' else f'_{frame}') # img.txt

# normalization gain whwh
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
if len(pred):

print("\n[Detections]")
if opt.detection_engine == "roboflow":
# Print results
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string

trans_bboxes = pred[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = pred[:, 4]

elif opt.detection_engine == "yolov4":

# Print results
# Rescale boxes from img_size to im0 size
#det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round()
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string


# Transform bboxes from tlbr to tlwh
trans_bboxes = pred[:, :4].clone()
bboxes = trans_bboxes[:, :4].cpu()
confs = pred[:, 4]

"""for idx, box in enumerate(bboxes):
plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx],
color=get_color_for(classes[idx]), line_thickness=opt.thickness)"""

print(s)
elif opt.detection_engine == "hub":

# Print results
# Rescale boxes from img_size to im0 size

xs = im0.shape[1]/imgsz
ys = im0.shape[0]/imgsz
clss = np.array(classes)
for c in np.unique(clss):
n = (clss == c).sum() # detections per class
s += f'{n} {c}, ' # add to string

# Transform bboxes from bltr to tlwh and perform scale back to im0 size
trans_bboxes = pred[:, :4].clone().numpy()
trans_bboxes = [[(tb[0]*xs)-(tb[2]*xs)/2, (tb[1]*ys)-(tb[3]*ys)/2, tb[2]*xs, tb[3]*ys] for tb in trans_bboxes if tb[2] > 0 and tb[3] > 0]
trans_bboxes = torch.tensor(trans_bboxes)
bboxes = trans_bboxes[:, :4].cpu()
confs = pred[:, 4]

print(s)
else:

# Print results
# Rescale boxes from img_size to im0 size
# Print results
# Rescale boxes from img_size to im0 size

det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f'{n} {names[int(c)]}s, ' # add to string
pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], im0.shape).round()
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f'{n} {names[int(c)]}s, ' # add to string

# Transform bboxes from tlbr to tlwh
trans_bboxes = det[:, :4].clone()
trans_bboxes[:, 2:] -= trans_bboxes[:, :2]
bboxes = trans_bboxes[:, :4].cpu()
confs = det[:, 4]
class_nums = det[:, -1]
classes = class_nums
# Transform bboxes from tlbr to tlwh
trans_bboxes = det[:, :4].clone()
trans_bboxes[:, 2:] -= trans_bboxes[:, :2]
bboxes = trans_bboxes[:, :4].cpu()
confs = det[:, 4]
class_nums = det[:, -1]
classes = class_nums

print(s)
print(s)

# moved up to roboflow inference

# encode yolo detections and feed to tracker
features = encoder(im0, bboxes)
detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip(
bboxes, confs, classes, features)]

# encode yolo detections and feed to tracker
features = encoder(im0, bboxes)
detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip(
bboxes, confs, classes, features)]
# run non-maxima supression
boxs = np.array([d.tlwh for d in detections])
print(boxs)
scores = np.array([d.confidence for d in detections])
class_nums = np.array([d.class_num for d in detections])
indices = preprocessing.non_max_suppression(
boxs, class_nums, nms_max_overlap, scores)
detections = [detections[i] for i in indices]

# run non-maxima supression
boxs = np.array([d.tlwh for d in detections])
scores = np.array([d.confidence for d in detections])
class_nums = np.array([d.class_num for d in detections])
indices = preprocessing.non_max_suppression(
boxs, class_nums, nms_max_overlap, scores)
detections = [detections[i] for i in indices]

# Call the tracker
tracker.predict()
tracker.update(detections)
# Call the tracker
tracker.predict()
tracker.update(detections)

# update tracks
update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn)
# update tracks
update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn)

# Print time (inference + NMS)
print(f'Done. ({t2 - t1:.3f}s)')
Expand Down Expand Up @@ -343,6 +374,12 @@ def detect(save_img=False):
help='Maximum size of the appearance descriptors allery. If None, no budget is enforced.')
parser.add_argument('--api_key', default=None,
help='Roboflow API Key.')
parser.add_argument('--github', default="ultralytics/yolov5",
help='Torch hub github. ex: ultralytics/yolov5')
parser.add_argument('--type', default="yolov5s",
help='Torch hub model type. ex: yolov5s, custom')
parser.add_argument('--sourceType', default="github",
help='where to load the modle from. ex: github, local')
parser.add_argument('--url', default=None,
help='Roboflow Model URL.')
parser.add_argument('--info', action='store_true',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ pandas

# clip
ftfy==6.0.3
regex==2.5.86
regex
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions models/common.py → yolov5/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import torch.nn as nn
from PIL import Image, ImageDraw

from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
from utils.plots import color_list
from yolov5.utils.datasets import letterbox
from yolov5.utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
from yolov5.utils.plots import color_list


def autopad(k, p=None): # kernel, padding
Expand Down
4 changes: 2 additions & 2 deletions models/experimental.py → yolov5/models/experimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import torch
import torch.nn as nn

from models.common import Conv, DWConv
from utils.google_utils import attempt_download
from yolov5.models.common import Conv, DWConv
from yolov5.utils.google_utils import attempt_download


class CrossConv(nn.Module):
Expand Down
4 changes: 2 additions & 2 deletions models/export.py → yolov5/models/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging, check_img_size
from yolov5.utils.activations import Hardswish, SiLU
from yolov5.utils.general import set_logging, check_img_size

if __name__ == '__main__':
parser = argparse.ArgumentParser()
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions utils/models/yolo.py → yolov5/models/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@

from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, C3, Concat, NMS, autoShape
from models.experimental import MixConv2d, CrossConv
from utils.autoanchor import check_anchor_order
from utils.general import make_divisible, check_file, set_logging
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
from yolov5.utils.autoanchor import check_anchor_order
from yolov5.utils.general import make_divisible, check_file, set_logging
from yolov5.utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
select_device, copy_attr

try:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion utils/autoanchor.py → yolov5/utils/autoanchor.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def print_results(k):
if isinstance(path, str): # *.yaml file
with open(path) as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
from utils.datasets import LoadImagesAndLabels
from yolov5.utils.datasets import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
else:
dataset = path # dataset
Expand Down