Skip to content

Commit

Permalink
[GPU] Few shape infer fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov committed May 15, 2024
1 parent e3934a8 commit f1417c1
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 43 deletions.
4 changes: 2 additions & 2 deletions src/core/shape_inference/include/nms_shape_inference.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ namespace v13 {
template <class T, class TRShape = result_shape_t<T>>
std::vector<TRShape> shape_infer(const NMSRotated* op,
const std::vector<T>& input_shapes,
const ITensorAccessor& ta = make_tensor_accessor()) {
constexpr bool static_output = !std::is_same<T, PartialShape>::value;
const ITensorAccessor& ta = make_tensor_accessor(),
const bool static_output = !std::is_same<T, PartialShape>::value) {
return nms::shape_infer(op, input_shapes, ta, static_output);
}
} // namespace v13
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct TensorsContainer final {
} else if (m_tensors.count(port) > 0) {
return m_tensors.at(port);
} else {
OPENVINO_THROW("[GPU] Can't get tensor for ", port, " port!\n");
return ov::Tensor{};
}
}

Expand Down
11 changes: 9 additions & 2 deletions src/plugins/intel_gpu/src/graph/dft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,15 @@ std::vector<layout> dft_inst::calc_output_layouts(dft_node const& /*node*/, kern
std::vector<layout> layouts;

const auto primitive = impl_param.typed_desc<dft>();
const auto input0_layout = impl_param.get_input_layout(0);
const auto input1_layout = impl_param.get_input_layout(1);
const auto& input0_layout = impl_param.get_input_layout(0);
if (impl_param.input_layouts.size() == 1) {
auto dt = primitive->get_output_data_type(0).value_or(input0_layout.data_type);
format output_format = format::adjust_to_rank(input0_layout.format, primitive->output_shape.size());
layouts.push_back(layout{primitive->output_shape, dt, output_format});
return layouts;
}

const auto& input1_layout = impl_param.get_input_layout(1);

std::vector<ShapeType> input_shapes = {
input0_layout.get<ShapeType>(),
Expand Down
7 changes: 7 additions & 0 deletions src/plugins/intel_gpu/src/graph/eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ std::vector<layout> eltwise_inst::calc_output_layouts(eltwise_node const& /*node
for (size_t i = 0; i < desc->input_size(); i++) {
input_shapes.push_back(impl_param.get_input_layout(i).get<ShapeType>());
}

if (desc->mode == eltwise_mode::is_finite || desc->mode == eltwise_mode::is_nan || desc->mode == eltwise_mode::is_inf) {
output_shapes = input_shapes;
} else {
output_shapes = ov::op::eltwise_shape_infer(&op, input_shapes);
}

output_shapes = ov::op::eltwise_shape_infer(&op, input_shapes);

if (input_layout.format == format::b_fs_zyx_fsv16) // use optimized 5D
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ std::vector<layout> loop_inst::calc_output_layouts(loop_node const& /*node*/, ke
if (impl_param.inner_nets.empty()) {
OPENVINO_ASSERT(impl_param.inner_progs.size() == 1, "Loop(", prim->id, ") should have only one inner network");
const auto& body_outputs = impl_param.inner_progs.front()->get_outputs();
output_layouts = get_output_layouts<program_node*>(impl_param, body_outputs);
output_layouts = get_output_layouts<program_node*>(impl_param, body_outputs, prim->max_num_iterations);
} else {
auto& memory_deps = impl_param.memory_deps;
const size_t current_iteration_idx = 0;
Expand Down
46 changes: 22 additions & 24 deletions src/plugins/intel_gpu/src/graph/non_max_suppression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include "json_object.h"
#include <string>

#include "intel_gpu/runtime/tensor_accessor.hpp"
#include "openvino/op/nms_rotated.hpp"
#include "nms_shape_inference.hpp"

namespace cldnn {
Expand All @@ -26,37 +28,33 @@ std::vector<layout> non_max_suppression_inst::calc_output_layouts(non_max_suppre

auto desc = impl_param.typed_desc<non_max_suppression>();

ov::op::v9::NonMaxSuppression op;
op.set_box_encoding(desc->center_point_box ? ov::op::v9::NonMaxSuppression::BoxEncodingType::CENTER
: ov::op::v9::NonMaxSuppression::BoxEncodingType::CORNER);
op.set_sort_result_descending(desc->sort_result_descending);

TensorsContainer const_data(&impl_param.get_stream(), impl_param.memory_deps);
std::vector<ShapeType> output_shapes = { ShapeType{}, ShapeType{}, ShapeType{} };
std::vector<ShapeType> input_shapes = {
impl_param.get_input_layout(0).get<ShapeType>(),
impl_param.get_input_layout(1).get<ShapeType>()
};

auto& memory_deps = impl_param.memory_deps;
std::unordered_map<size_t, ov::Tensor> const_data;
if (memory_deps.count(2)) {
auto max_output_boxes_per_class_mem = memory_deps.at(2);
cldnn::mem_lock<uint8_t, mem_lock_type::read> max_output_boxes_per_class_lock(max_output_boxes_per_class_mem,
impl_param.get_stream());
auto max_output_boxes_per_class_tensor = make_tensor(max_output_boxes_per_class_mem->get_layout(),
max_output_boxes_per_class_lock.data());
const_data.emplace(2, max_output_boxes_per_class_tensor);

const auto& boxes = input_shapes[0];
const auto& scores = input_shapes[1];
// To produce a static output, we need to check dynamism of input tensor's dimensions
// Output tensor has the following shape: [min(num_boxes, max_output_boxes_per_class) * num_batches * num_classes, 3]
// The first dimension is an upper bound for the number of possible selected boxes
bool static_output = boxes[1].is_static() && scores[0].is_static() && scores[1].is_static();
output_shapes = ov::op::v9::shape_infer(&op, input_shapes, ov::make_tensor_accessor(const_data), static_output);
const auto& boxes = input_shapes[0];
const auto& scores = input_shapes[1];
// To produce a static output, we need to check dynamism of input tensor's dimensions
// Output tensor has the following shape: [min(num_boxes, max_output_boxes_per_class) * num_batches * num_classes, 3]
// The first dimension is an upper bound for the number of possible selected boxes
bool static_output = boxes[1].is_static() && scores[0].is_static() && scores[1].is_static();

if (desc->rotation != non_max_suppression::Rotation::NONE) {
ov::op::v13::NMSRotated op;
op.set_clockwise(desc->rotation == non_max_suppression::Rotation::CLOCKWISE);
op.set_sort_result_descending(desc->sort_result_descending);

output_shapes = ov::op::v13::shape_infer(&op, input_shapes, cldnn::make_tensor_accessor(const_data), static_output);
} else {
output_shapes[0] = output_shapes[1] = ShapeType{ov::Dimension::dynamic(), 3};
output_shapes[2] = ShapeType{1};
ov::op::v9::NonMaxSuppression op;
op.set_box_encoding(desc->center_point_box ? ov::op::v9::NonMaxSuppression::BoxEncodingType::CENTER
: ov::op::v9::NonMaxSuppression::BoxEncodingType::CORNER);
op.set_sort_result_descending(desc->sort_result_descending);

output_shapes = ov::op::v9::shape_infer(&op, input_shapes, cldnn::make_tensor_accessor(const_data), static_output);
}

for (size_t i = 0; i < desc->num_outputs; ++i) {
Expand Down
18 changes: 16 additions & 2 deletions src/plugins/intel_gpu/src/graph/pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,18 @@ std::vector<layout> pooling_inst::calc_output_layouts(pooling_node const& /*node
output_shape[0] = input_shape[0];
output_shape[1] = input_shape[1];

std::vector<layout> out_layouts = {
layout{output_shape, output_dtype, input_layout.format}
};

if (desc->num_outputs == 2) {
auto l = out_layouts[0];
l.data_type = desc->index_element_type;
out_layouts.push_back(l);
}

if (input_shape.is_dynamic()) {
return { layout{output_shape, input_layout.data_type, input_layout.format} };
return out_layouts;
}

if (desc->with_output_size) {
Expand Down Expand Up @@ -236,7 +246,11 @@ std::vector<layout> pooling_inst::calc_output_layouts(pooling_node const& /*node
output_shape[i + 2] = out_dim;
}

return { layout{output_shape, output_dtype, input_layout.format} };
for (auto& ol : out_layouts) {
ol.set_partial_shape(output_shape);
}

return out_layouts;
}

template std::vector<layout> pooling_inst::calc_output_layouts<ov::PartialShape>(pooling_node const& node, const kernel_impl_params& impl_param);
Expand Down
46 changes: 35 additions & 11 deletions src/plugins/intel_gpu/src/graph/roi_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "roi_pooling_inst.h"
#include "roi_pooling_shape_inference.hpp"
#include "psroi_pooling_shape_inference.hpp"

#include "primitive_type_base.h"
#include "json_object.h"
Expand All @@ -28,23 +29,46 @@ layout roi_pooling_inst::calc_output_layout(roi_pooling_node const& node, kernel

template<typename ShapeType>
std::vector<layout> roi_pooling_inst::calc_output_layouts(roi_pooling_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<roi_pooling>();
auto desc = impl_param.typed_desc<roi_pooling>();
auto input0_layout = impl_param.get_input_layout(0);
auto output_type = desc->output_data_types[0].value_or(input0_layout.data_type);
auto data_shape = input0_layout.get<ShapeType>();
auto output_format = input0_layout.format;

ov::op::v0::ROIPooling op;
std::vector<int> output_size { desc->pooled_height, desc->pooled_width };
op.set_output_roi({ output_size.begin(), output_size.end() });
op.set_spatial_scale(desc->spatial_scale);

std::vector<ShapeType> output_shapes;
ShapeType rois_shape = impl_param.get_input_layout(1).get<ShapeType>();
std::vector<ShapeType> input_shapes = {
data_shape,
rois_shape
};
std::vector<ShapeType> output_shapes = ov::op::v0::shape_infer(&op, input_shapes);
if (desc->mode == cldnn::pooling_mode::deformable_bilinear) {
auto group_size = desc->group_size;
auto out_dim = desc->output_dim;
auto num_rois = rois_shape[0];

output_shapes = { ov::PartialShape{num_rois, out_dim, group_size, group_size} };
} else if (desc->position_sensitive) {
ov::op::v0::PSROIPooling op;
op.set_spatial_scale(desc->spatial_scale);
op.set_output_dim(desc->output_dim);
op.set_group_size(desc->pooled_width);
op.set_spatial_bins_x(desc->spatial_bins_x);
op.set_spatial_bins_y(desc->spatial_bins_y);
op.set_mode("average"); // mode doesn't matter

std::vector<ShapeType> input_shapes = {
data_shape,
rois_shape
};
output_shapes = ov::op::v0::shape_infer(&op, input_shapes);
} else {
ov::op::v0::ROIPooling op;
std::vector<int> output_size { desc->pooled_height, desc->pooled_width };
op.set_output_roi({ output_size.begin(), output_size.end() });
op.set_spatial_scale(desc->spatial_scale);

std::vector<ShapeType> input_shapes = {
data_shape,
rois_shape
};
output_shapes = ov::op::v0::shape_infer(&op, input_shapes);
}

return { layout{output_shapes[0], output_type, output_format} };
}
Expand Down

0 comments on commit f1417c1

Please sign in to comment.