Skip to content

Commit

Permalink
[CPU][ARM] Weights compression f32->f16 is moved to CPU Plug-in side
Browse files Browse the repository at this point in the history
  • Loading branch information
antonvor committed Nov 15, 2023
1 parent 7cb3bf5 commit c8cee60
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 10 deletions.
2 changes: 1 addition & 1 deletion samples/cpp/benchmark_app/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ void fuse_mean_scale(ov::preprocess::PrePostProcessor& preproc, const benchmark_
* @brief The entry point of the benchmark application
*/
int main(int argc, char* argv[]) {
std::shared_ptr<StatisticsReport> statistics;
std::shared_ptr<StatisticsReport> statistics;

Check warning on line 236 in samples/cpp/benchmark_app/main.cpp

View workflow job for this annotation

GitHub Actions / clang-format

[reviewdog-suggester] reported by reviewdog 🐶 Raw Output: samples/cpp/benchmark_app/main.cpp:236:- std::shared_ptr<StatisticsReport> statistics; samples/cpp/benchmark_app/main.cpp:236:+ std::shared_ptr<StatisticsReport> statistics;
try {
ov::CompiledModel compiledModel;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace pass {
class TRANSFORMATIONS_API EnableDecompressionConvertConstantFolding;
class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding;
class TRANSFORMATIONS_API KeepConstAndDecompression;
class TRANSFORMATIONS_API KeepConstFP32Unfolded;
class TRANSFORMATIONS_API KeepConstantsPrecisionAndAddConverts;

} // namespace pass
Expand Down Expand Up @@ -49,6 +50,12 @@ class ov::pass::KeepConstAndDecompression : public MatcherPass {
KeepConstAndDecompression();
};

class ov::pass::KeepConstFP32Unfolded : public MatcherPass {
public:
OPENVINO_RTTI("KeepConstFP32Unfolded", "0");
KeepConstFP32Unfolded();
};

/**
* @ingroup ie_transformation_common_api
* @brief Prevents Consts precision conversion and adds Convert with disabled ConstantFolding
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ TRANSFORMATIONS_API void unmark_as_decompression(const std::shared_ptr<Node>& no

TRANSFORMATIONS_API bool is_decompression(const std::shared_ptr<Node>& node);

TRANSFORMATIONS_API void mark_as_compression(const std::shared_ptr<Node>& node);

TRANSFORMATIONS_API void unmark_as_compression(const std::shared_ptr<Node>& node);

TRANSFORMATIONS_API bool is_compression(const std::shared_ptr<Node>& node);

/**
* @ingroup ie_runtime_attr_api
* @brief Decompression class represents runtime info attribute that marks operation
Expand All @@ -43,4 +49,19 @@ class TRANSFORMATIONS_API Decompression : public RuntimeAttribute {
}
};

class TRANSFORMATIONS_API Compression : public RuntimeAttribute {
public:
OPENVINO_RTTI("Compression", "0");

Compression() = default;

bool visit_attributes(AttributeVisitor& visitor) override {
return true;
}

bool is_copyable() const override {
return false;
}
};

} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "openvino/op/util/precision_sensitive_attribute.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "transformations/rt_info/disable_fp16_compression.hpp"

Check warning on line 13 in src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp

View workflow job for this annotation

GitHub Actions / clang-format

[reviewdog-suggester] reported by reviewdog 🐶 Raw Output: src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp:13:-#include "transformations/rt_info/disable_fp16_compression.hpp"
#include "transformations/rt_info/decompression.hpp"

Check warning on line 15 in src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp

View workflow job for this annotation

GitHub Actions / clang-format

[reviewdog-suggester] reported by reviewdog 🐶 Raw Output: src/common/transformations/src/transformations/fp16_compression/align_mixed_fp32_fp16_types.cpp:14:+#include "transformations/rt_info/disable_fp16_compression.hpp"
using namespace ov;

Expand Down Expand Up @@ -48,6 +49,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
copy_runtime_info(incoming_node, convert);
input.replace_source_output(convert);
disable_fp16_compression(convert);
mark_as_compression(convert);
pass::disable_constant_folding(convert);
is_changed = true;
}
Expand Down Expand Up @@ -76,6 +78,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M
auto init_name = node->get_friendly_name() + "_compressed_to_f16";
convert->set_friendly_name(generate_uniq_name(init_name));
out_inputs.replace_source_output(convert);
mark_as_compression(convert);
pass::disable_constant_folding(convert);
is_changed = true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,32 @@ pass::KeepConstAndDecompression::KeepConstAndDecompression() {
register_matcher(m, callback);
}

pass::KeepConstFP32Unfolded::KeepConstFP32Unfolded() {
MATCHER_SCOPE(KeepConstFP16Unfolded);

auto node_pattern = pattern::wrap_type<ov::op::v0::MatMul>();

matcher_pass_callback callback = [=](pattern::Matcher& m) {
auto node = m.get_match_root();

if (transformation_callback(node)) {
return false;
}

auto constNode = node->get_input_node_shared_ptr(1);
if (!is_type<ov::op::v0::Constant>(constNode) || constNode->get_output_element_type(0) != element::f32)
return false;

disable_constant_folding(constNode);
enable_keep_const_precision(constNode);
disable_fp16_compression(constNode);

return false;
};
auto m = std::make_shared<pattern::Matcher>(node_pattern, matcher_name);
register_matcher(m, callback);
}

pass::KeepConstantsPrecisionAndAddConverts::KeepConstantsPrecisionAndAddConverts() {
MATCHER_SCOPE(KeepConstantsPrecisionAndAddConverts);
auto const_pattern = pattern::wrap_type<ov::op::v0::Constant>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,16 +432,16 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
Manager manager(get_pass_config());
// Mark root of Division with eps pattern to keep in FP32
REGISTER_PASS(manager, MarkDivWithEps)
REGISTER_PASS(manager, MarkExpInReduceOpPath)
REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)

REGISTER_PASS(manager, MarkExpInReduceOpPath)

Check warning on line 435 in src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp

View workflow job for this annotation

GitHub Actions / clang-format

[reviewdog-suggester] reported by reviewdog 🐶 Raw Output: src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:435:- REGISTER_PASS(manager, MarkExpInReduceOpPath) src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:436:- REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized) src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:437:- src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:435:+ REGISTER_PASS(manager, MarkExpInReduceOpPath) src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:436:+ REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized) src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:437:+
REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)
// both Up and Down propagations are needed.
// Why both of them are needed is explained in comments in passes declarations.
REGISTER_PASS(manager, PropagateDownMarkToKeepInMixedPrecision)

Check warning on line 441 in src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp

View workflow job for this annotation

GitHub Actions / clang-format

[reviewdog-suggester] reported by reviewdog 🐶 Raw Output: src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:441:- src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:441:+
auto propagate_up = manager.register_pass<BackwardGraphRewrite>();
ADD_MATCHER(propagate_up, PropagateUpMarkToKeepInMixedPrecision)

Check warning on line 444 in src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp

View workflow job for this annotation

GitHub Actions / clang-format

[reviewdog-suggester] reported by reviewdog 🐶 Raw Output: src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:444:- src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp:444:+
// Mark nodes in ShapeOf subgraphs to keep in FP32
REGISTER_PASS(manager, MarkPrecisionSensitiveShapeOfSubgraphs)
manager.run_passes(m);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,18 @@ bool ov::is_decompression(const std::shared_ptr<Node>& node) {
const auto& rt_info = node->get_rt_info();
return rt_info.count(Decompression::get_type_info_static());
}

void ov::mark_as_compression(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info[Compression::get_type_info_static()] = Compression();
}

void ov::unmark_as_compression(const std::shared_ptr<Node>& node) {
auto& rt_info = node->get_rt_info();
rt_info.erase(Compression::get_type_info_static());
}

bool ov::is_compression(const std::shared_ptr<Node>& node) {
const auto& rt_info = node->get_rt_info();
return rt_info.count(Compression::get_type_info_static());
}
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -921,8 +921,8 @@ void GraphOptimizer::FuseFCAndConvertOnWeights(Graph& graph) {
&& parent->getChildEdges().size() == 1
&& parent->getChildEdgeAt(0)->getOutputNum() == 1
&& parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected
&& one_of(parent->getOriginalInputPrecisionAtPort(0), Precision::FP16)
&& one_of(parent->getOriginalOutputPrecisionAtPort(0), Precision::FP32, Precision::BF16)
&& one_of(parent->getOriginalInputPrecisionAtPort(0), Precision::FP32, Precision::BF16, Precision::FP16)
&& one_of(parent->getOriginalOutputPrecisionAtPort(0), Precision::FP32, Precision::BF16, Precision::FP16)
&& parent->isConstant();
return res;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() {
auto fc_input_b = pattern_map.at(weights_m);
bool is_convert = false;
if (auto convert_node = std::dynamic_pointer_cast<ov::op::v0::Convert>(fc_input_b.get_node_shared_ptr())) {
if (is_decompression(convert_node)) {
if (is_decompression(convert_node) || fp16_compression_is_disabled(convert_node) || is_compression(convert_node)) {
is_convert = true;
fc_input_b = convert_node->get_input_node_shared_ptr(0);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
// It cannot be static data, because it may be difference for different inferencePrecision
const auto precisions = get_convert_precisions();
if (inferencePrecision == ov::element::f16) {
CPU_REGISTER_PASS_ARM(manager, ov::pass::KeepConstFP32Unfolded);
precisions_map fp_convert_precision_map = {{ov::element::f32, ov::element::f16}};
type_to_fuse_map empty_fuse_map = {};
const bool keep_precision_sensitive_in_fp32 = true;
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/thirdparty/onednn

0 comments on commit c8cee60

Please sign in to comment.