Skip to content

Commit

Permalink
Add yolo v8 (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
Wovchena authored Sep 18, 2023
1 parent 03a6cee commit 3623705
Show file tree
Hide file tree
Showing 19 changed files with 550 additions and 67 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test_accuracy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
run: |
source venv/bin/activate
pytest --data=./data tests/python/accuracy/test_accuracy.py
DATA=data pytest --data=./data tests/python/accuracy/test_YOLOv8.py
- name: Install CPP ependencies
run: |
sudo bash model_api/cpp/install_dependencies.sh
Expand All @@ -40,3 +41,4 @@ jobs:
- name: Run CPP Test
run: |
build/test_accuracy -d data -p tests/python/accuracy/public_scope.json
DATA=data build/test_YOLOv8
3 changes: 3 additions & 0 deletions docs/model-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ The list features only model wrappers which intoduce new configuration values in
###### `YoloV4`
1. `anchors`: List - list of custom anchor values
1. `masks`: List - list of mask, applied to anchors for each output layer
###### `YOLOv5`, `YOLOv8`
1. `agnostic_nms`: bool - if True, the model is agnostic to the number of classes, and all classes are considered as one
1. `iou_threshold`: float - threshold for non-maximum suppression (NMS) intersection over union (IOU) filtering
###### `YOLOX`
1. `iou_threshold`: float - threshold for non-maximum suppression (NMS) intersection over union (IOU) filtering
#### `HpeAssociativeEmbedding`
Expand Down
21 changes: 21 additions & 0 deletions model_api/cpp/models/include/models/detection_model_yolo.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,24 @@ class ModelYolo : public DetectionModelExt {
std::vector<int64_t> presetMasks;
ov::Layout yoloRegionLayout = "NCHW";
};

class YOLOv5 : public DetectionModelExt {
// Reimplementation of ultralytics.YOLO
void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
void updateModelInfo() override;
void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority);
bool agnostic_nms = false;
public:
YOLOv5(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration);
YOLOv5(std::shared_ptr<InferenceAdapter>& adapter);
std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
static std::string ModelType;
};

class YOLOv8 : public YOLOv5 {
public:
// YOLOv5 and YOLOv8 are identical in terms of inference
YOLOv8(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration) : YOLOv5{model, configuration} {}
YOLOv8(std::shared_ptr<InferenceAdapter>& adapter) : YOLOv5{adapter} {}
static std::string ModelType;
};
4 changes: 4 additions & 0 deletions model_api/cpp/models/src/detection_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ std::unique_ptr<DetectionModel> DetectionModel::create_model(const std::string&
detectionModel = std::unique_ptr<DetectionModel>(new ModelYoloX(model, configuration));
} else if (model_type == ModelCenterNet::ModelType) {
detectionModel = std::unique_ptr<DetectionModel>(new ModelCenterNet(model, configuration));
} else if (model_type == YOLOv5::ModelType) {
detectionModel = std::unique_ptr<DetectionModel>(new YOLOv5(model, configuration));
} else if (model_type == YOLOv8::ModelType) {
detectionModel = std::unique_ptr<DetectionModel>(new YOLOv8(model, configuration));
} else {
throw std::runtime_error("Incorrect or unsupported model_type is provided in the model_info section: " + model_type);
}
Expand Down
2 changes: 1 addition & 1 deletion model_api/cpp/models/src/detection_model_faceboxes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ std::unique_ptr<ResultBase> ModelFaceBoxes::postprocess(InferenceResult& infResu
std::vector<Anchor> boxes = filterBoxes(boxesTensor, anchors, scores.first, variance);

// Apply Non-maximum Suppression
const std::vector<int> keep = nms(boxes, scores.second, iou_threshold);
const std::vector<size_t>& keep = nms(boxes, scores.second, iou_threshold);

// Create detection result objects
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
Expand Down
18 changes: 10 additions & 8 deletions model_api/cpp/models/src/detection_model_ssd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,13 @@ std::unique_ptr<ResultBase> ModelSSD::postprocessSingleOutput(InferenceResult& i
0.f,
floatInputImgHeight);
desc.width = clamp(
round((detections[i * numAndStep.objectSize + 5] * netInputWidth - padLeft) * invertedScaleX - desc.x),
round((detections[i * numAndStep.objectSize + 5] * netInputWidth - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth);
floatInputImgWidth) - desc.x;
desc.height = clamp(
round((detections[i * numAndStep.objectSize + 6] * netInputHeight - padTop) * invertedScaleY - desc.y),
0.f, floatInputImgHeight);
round((detections[i * numAndStep.objectSize + 6] * netInputHeight - padTop) * invertedScaleY),
0.f,
floatInputImgHeight) - desc.y;
result->objects.push_back(desc);
}
}
Expand Down Expand Up @@ -222,12 +223,13 @@ std::unique_ptr<ResultBase> ModelSSD::postprocessMultipleOutputs(InferenceResult
0.f,
floatInputImgHeight);
desc.width = clamp(
round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX - desc.x),
round((boxes[i * numAndStep.objectSize + 2] * widthScale - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth);
floatInputImgWidth) - desc.x;
desc.height = clamp(
round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY - desc.y),
0.f, floatInputImgHeight);
round((boxes[i * numAndStep.objectSize + 3] * heightScale - padTop) * invertedScaleY),
0.f,
floatInputImgHeight) - desc.y;
result->objects.push_back(desc);
}
}
Expand Down
169 changes: 169 additions & 0 deletions model_api/cpp/models/src/detection_model_yolo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <openvino/openvino.hpp>

#include <utils/common.hpp>
#include <utils/nms.hpp>
#include <utils/slog.hpp>

#include "models/internal_model_data.h"
Expand Down Expand Up @@ -504,3 +505,171 @@ ModelYolo::Region::Region(size_t classes,
num = anchors.size() / 2;
}
}

std::string YOLOv5::ModelType = "YOLOv5";

void YOLOv5::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
const ov::Output<ov::Node>& input = model->input();
const ov::Shape& in_shape = input.get_partial_shape().get_max_shape();
if (in_shape.size() != 4) {
throw std::runtime_error("YOLO: the rank of the input must be 4");
}
inputNames.push_back(input.get_any_name());
const ov::Layout& inputLayout = getInputLayout(input);
if (!embedded_processing) {
model = ImageModel::embedProcessing(model,
inputNames[0],
inputLayout,
resizeMode,
interpolationMode,
ov::Shape{
in_shape[ov::layout::width_idx(inputLayout)],
in_shape[ov::layout::height_idx(inputLayout)]
},
pad_value,
reverse_input_channels,
{},
scale_values);

netInputWidth = in_shape[ov::layout::width_idx(inputLayout)];
netInputHeight = in_shape[ov::layout::height_idx(inputLayout)];

embedded_processing = true;
}

const ov::Output<const ov::Node>& output = model->output();
if (ov::element::Type_t::f32 != output.get_element_type()) {
throw std::runtime_error("YOLO: the output must be of precision f32");
}
const ov::Shape& out_shape = output.get_partial_shape().get_max_shape();
if (3 != out_shape.size()) {
throw std::runtime_error("YOLO: the output must be of rank 3");
}
if (!labels.empty() && labels.size() + 4 != out_shape[1]) {
throw std::runtime_error("YOLO: number of labels must be smaller than out_shape[1] by 4");
}
}

void YOLOv5::updateModelInfo() {
DetectionModelExt::updateModelInfo();
model->set_rt_info(YOLOv5::ModelType, "model_info", "model_type");
model->set_rt_info(agnostic_nms, "model_info", "agnostic_nms");
model->set_rt_info(iou_threshold, "model_info", "iou_threshold");
}

void YOLOv5::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) {
pad_value = get_from_any_maps("pad_value", top_priority, mid_priority, 114);
if (top_priority.find("resize_type") == top_priority.end() && mid_priority.find("resize_type") == mid_priority.end()) {
interpolationMode = cv::INTER_LINEAR;
resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
}
reverse_input_channels = get_from_any_maps("reverse_input_channels", top_priority, mid_priority, true);
scale_values = get_from_any_maps("scale_values", top_priority, mid_priority, std::vector<float>{255.0f});
confidence_threshold = get_from_any_maps("confidence_threshold", top_priority, mid_priority, 0.25f);
agnostic_nms = get_from_any_maps("agnostic_nms", top_priority, mid_priority, agnostic_nms);
iou_threshold = get_from_any_maps("iou_threshold", top_priority, mid_priority, 0.7f);
}

YOLOv5::YOLOv5(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration)
: DetectionModelExt(model, configuration) {
init_from_config(configuration, model->get_rt_info<ov::AnyMap>("model_info"));
}

YOLOv5::YOLOv5(std::shared_ptr<InferenceAdapter>& adapter)
: DetectionModelExt(adapter) {
init_from_config(adapter->getModelConfig(), ov::AnyMap{});
}

std::unique_ptr<ResultBase> YOLOv5::postprocess(InferenceResult& infResult) {
if (1 != infResult.outputsData.size()) {
throw std::runtime_error("YOLO: expect 1 output");
}
const ov::Tensor& detectionsTensor = infResult.getFirstOutputTensor();
const ov::Shape& out_shape = detectionsTensor.get_shape();
if (3 != out_shape.size()) {
throw std::runtime_error("YOLO: the output must be of rank 3");
}
if (1 != out_shape[0]) {
throw std::runtime_error("YOLO: the first dim of the output must be 1");
}
size_t num_proposals = out_shape[2];
std::vector<Anchor> boxes;
std::vector<float> confidences;
std::vector<size_t> labelIDs;
const float* const detections = detectionsTensor.data<float>();
for (size_t i = 0; i < num_proposals; ++i) {
float confidence = 0.0f;
size_t max_id = 0;
constexpr size_t LABELS_START = 4;
for (size_t j = LABELS_START; j < out_shape[1]; ++j) {
if (detections[j * num_proposals + i] > confidence) {
confidence = detections[j * num_proposals + i];
max_id = j;
}
}
if (confidence > confidence_threshold) {
boxes.push_back(Anchor{
detections[0 * num_proposals + i] - detections[2 * num_proposals + i] / 2.0f,
detections[1 * num_proposals + i] - detections[3 * num_proposals + i] / 2.0f,
detections[0 * num_proposals + i] + detections[2 * num_proposals + i] / 2.0f,
detections[1 * num_proposals + i] + detections[3 * num_proposals + i] / 2.0f,
});
confidences.push_back(confidence);
labelIDs.push_back(max_id - LABELS_START);
}
}
constexpr bool includeBoundaries = false;
constexpr size_t keep_top_k = 30000;
std::vector<size_t> keep;
if (agnostic_nms) {
keep = nms(boxes, confidences, iou_threshold, includeBoundaries, keep_top_k);
} else {
std::vector<AnchorLabeled> boxes_with_class;
boxes_with_class.reserve(boxes.size());
for (size_t i = 0; i < boxes.size(); ++i) {
boxes_with_class.emplace_back(boxes[i], int(labelIDs[i]));
}
keep = multiclass_nms(boxes_with_class, confidences, iou_threshold, includeBoundaries, keep_top_k);
}
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
auto base = std::unique_ptr<ResultBase>(result);
const auto& internalData = infResult.internalModelData->asRef<InternalImageModelData>();
float floatInputImgWidth = float(internalData.inputImgWidth),
floatInputImgHeight = float(internalData.inputImgHeight);
float invertedScaleX = floatInputImgWidth / netInputWidth,
invertedScaleY = floatInputImgHeight / netInputHeight;
int padLeft = 0, padTop = 0;
if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
invertedScaleX = invertedScaleY = std::max(invertedScaleX, invertedScaleY);
if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
padLeft = (netInputWidth - int(std::round(floatInputImgWidth / invertedScaleX))) / 2;
padTop = (netInputHeight - int(std::round(floatInputImgHeight / invertedScaleY))) / 2;
}
}
for (size_t idx : keep) {
DetectedObject desc;
desc.x = clamp(
round((boxes[idx].left - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth);
desc.y = clamp(
round((boxes[idx].top - padTop) * invertedScaleY),
0.f,
floatInputImgHeight);
desc.width = clamp(
round((boxes[idx].right - padLeft) * invertedScaleX),
0.f,
floatInputImgWidth) - desc.x;
desc.height = clamp(
round((boxes[idx].bottom - padTop) * invertedScaleY),
0.f,
floatInputImgHeight) - desc.y;
desc.confidence = confidences[idx];
desc.labelID = static_cast<size_t>(labelIDs[idx]);
desc.label = getLabelName(desc.labelID);
result->objects.push_back(desc);
}
return base;
}

std::string YOLOv8::ModelType = "YOLOv8";
4 changes: 2 additions & 2 deletions model_api/cpp/models/src/detection_model_yolox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ std::unique_ptr<ResultBase> ModelYoloX::postprocess(InferenceResult& infResult)
}

// NMS for valid boxes
std::vector<int> keep = nms(validBoxes, scores, iou_threshold, true);
for (auto& index: keep) {
const std::vector<size_t>& keep = nms(validBoxes, scores, iou_threshold, true);
for (size_t index: keep) {
// Create new detected box
DetectedObject obj;
obj.x = clamp(validBoxes[index].left, 0.f, static_cast<float>(scale.inputImgWidth));
Expand Down
29 changes: 14 additions & 15 deletions model_api/cpp/utils/include/utils/nms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ struct AnchorLabeled : public Anchor {
AnchorLabeled() = default;
AnchorLabeled(float _left, float _top, float _right, float _bottom, int _labelID) :
Anchor(_left, _top, _right, _bottom), labelID(_labelID) {}
AnchorLabeled(const Anchor& coords, int labelID) : Anchor{coords}, labelID{labelID} {}
};

template <typename Anchor>
std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores,
const float thresh, bool includeBoundaries=false, size_t maxNum=0) {
if (maxNum == 0) {
maxNum = boxes.size();
std::vector<size_t> nms(const std::vector<Anchor>& boxes, const std::vector<float>& scores, const float thresh, bool includeBoundaries=false, size_t keep_top_k=0) {
if (keep_top_k == 0) {
keep_top_k = boxes.size();
}
std::vector<float> areas(boxes.size());
for (size_t i = 0; i < boxes.size(); ++i) {
Expand All @@ -67,25 +67,24 @@ std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>&
std::sort(order.begin(), order.end(), [&scores](int o1, int o2) { return scores[o1] > scores[o2]; });

size_t ordersNum = 0;
for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0 && ordersNum < maxNum; ordersNum++);
for (; ordersNum < order.size() && scores[order[ordersNum]] >= 0 && ordersNum < keep_top_k; ordersNum++);

std::vector<int> keep;
std::vector<size_t> keep;
bool shouldContinue = true;
for (size_t i = 0; shouldContinue && i < ordersNum; ++i) {
auto idx1 = order[i];
int idx1 = order[i];
if (idx1 >= 0) {
keep.push_back(idx1);
shouldContinue = false;
for (size_t j = i + 1; j < ordersNum; ++j) {
auto idx2 = order[j];
int idx2 = order[j];
if (idx2 >= 0) {
shouldContinue = true;
auto overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
auto overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
auto intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
auto overlap = intersection / (areas[idx1] + areas[idx2] - intersection);

if (overlap >= thresh) {
float overlappingWidth = std::fminf(boxes[idx1].right, boxes[idx2].right) - std::fmaxf(boxes[idx1].left, boxes[idx2].left);
float overlappingHeight = std::fminf(boxes[idx1].bottom, boxes[idx2].bottom) - std::fmaxf(boxes[idx1].top, boxes[idx2].top);
float intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0;
float union_area = areas[idx1] + areas[idx2] - intersection;
if (0.0f == union_area || intersection / union_area > thresh) {
order[j] = -1;
}
}
Expand All @@ -95,5 +94,5 @@ std::vector<int> nms(const std::vector<Anchor>& boxes, const std::vector<float>&
return keep;
}

std::vector<int> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
std::vector<size_t> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
const float iou_threshold=0.45f, bool includeBoundaries=false, size_t maxNum=200);
2 changes: 1 addition & 1 deletion model_api/cpp/utils/src/nms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "utils/nms.hpp"


std::vector<int> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
std::vector<size_t> multiclass_nms(const std::vector<AnchorLabeled>& boxes, const std::vector<float>& scores,
const float iou_threshold, bool includeBoundaries, size_t maxNum) {
std::vector<Anchor> boxes_copy;
boxes_copy.reserve(boxes.size());
Expand Down
4 changes: 3 additions & 1 deletion model_api/python/openvino/model_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
add_rotated_rects,
get_contours,
)
from .yolo import YOLO, YOLOF, YOLOX, YoloV3ONNX, YoloV4
from .yolo import YOLO, YOLOF, YOLOX, YoloV3ONNX, YoloV4, YOLOv5, YOLOv8

classification_models = [
"resnet-18-pytorch",
Expand Down Expand Up @@ -118,6 +118,8 @@
"YOLO",
"YoloV3ONNX",
"YoloV4",
"YOLOv5",
"YOLOv8",
"YOLOF",
"YOLOX",
"ClassificationResult",
Expand Down
Loading

0 comments on commit 3623705

Please sign in to comment.