-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add model compression to FP16 weights
- Loading branch information
Showing
21 changed files
with
502 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
...e/src/transformations/include/transformations/common_optimizations/compress_constants.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <transformations_visibility.hpp> | ||
#include <ngraph/pass/graph_rewrite.hpp> | ||
|
||
namespace ngraph { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API CompressConstants; | ||
|
||
} // namespace pass | ||
} // namespace ngraph | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief CompressConstants transformation replaces FP32 Constants with FP16 ones. | ||
*/ | ||
class ngraph::pass::CompressConstants : public ngraph::pass::MatcherPass { | ||
public: | ||
NGRAPH_RTTI_DECLARATION; | ||
CompressConstants(); | ||
}; |
26 changes: 26 additions & 0 deletions
26
...de/transformations/common_optimizations/disable_decomression_convert_constant_folding.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <transformations_visibility.hpp> | ||
#include <ngraph/pass/graph_rewrite.hpp> | ||
|
||
namespace ngraph { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API DisableDecompressionConvertConstantFolding; | ||
|
||
} // namespace pass | ||
} // namespace ngraph | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief Disables ConstantFolding for Convert operation in compressed function. | ||
*/ | ||
class ngraph::pass::DisableDecompressionConvertConstantFolding : public ngraph::pass::MatcherPass { | ||
public: | ||
NGRAPH_RTTI_DECLARATION; | ||
DisableDecompressionConvertConstantFolding(); | ||
}; |
24 changes: 24 additions & 0 deletions
24
...tions/include/transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <ngraph/ngraph.hpp> | ||
#include <ngraph/pass/pass.hpp> | ||
|
||
#include <transformations_visibility.hpp> | ||
|
||
namespace ngraph { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API MarkPrecisionSensitiveSubgraphs; | ||
|
||
} // namespace pass | ||
} // namespace ngraph | ||
|
||
class ngraph::pass::MarkPrecisionSensitiveSubgraphs : public FunctionPass { | ||
public: | ||
NGRAPH_RTTI_DECLARATION; | ||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
31 changes: 31 additions & 0 deletions
31
inference-engine/src/transformations/include/transformations/rt_info/decompression.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <assert.h> | ||
#include <functional> | ||
#include <memory> | ||
#include <string> | ||
#include <set> | ||
|
||
#include <ngraph/node.hpp> | ||
#include <ngraph/variant.hpp> | ||
#include <transformations_visibility.hpp> | ||
|
||
|
||
namespace ov { | ||
|
||
class TRANSFORMATIONS_API Decompression : public VariantImpl<void> { | ||
public: | ||
OPENVINO_RTTI("decompression", "0"); | ||
|
||
Decompression() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override { return true; } | ||
|
||
bool is_copyable() const override { return false; } | ||
}; | ||
|
||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
...ngine/src/transformations/src/transformations/common_optimizations/compress_constants.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "transformations/common_optimizations/compress_constants.hpp" | ||
|
||
#include <ngraph/opsets/opset8.hpp> | ||
#include <ngraph/rt_info.hpp> | ||
#include <ngraph/pattern/op/wrap_type.hpp> | ||
#include <transformations/rt_info/decompression.hpp> | ||
#include "itt.hpp" | ||
|
||
NGRAPH_RTTI_DEFINITION(ngraph::pass::CompressConstants, "CompressConstants", 0); | ||
|
||
ngraph::pass::CompressConstants::CompressConstants() { | ||
MATCHER_SCOPE(CompressConstants); | ||
auto const_node = ngraph::pattern::wrap_type<opset8::Constant>(); | ||
|
||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { | ||
const auto& pattern_map = m.get_pattern_value_map(); | ||
const auto& const_node_pattern = pattern_map.at(const_node); | ||
|
||
const auto& const_node = std::dynamic_pointer_cast<ngraph::opset8::Constant>( | ||
const_node_pattern.get_node_shared_ptr()); | ||
|
||
if (const_node->get_element_type() != ov::element::f32 && const_node->get_element_type() != ov::element::f64) | ||
return false; | ||
|
||
const auto& rt_info = const_node->get_rt_info(); | ||
if (rt_info.count("DISABLE_FP16_COMPRESSION")) | ||
return false; | ||
|
||
auto new_const = std::make_shared<ngraph::opset8::Constant>(ov::element::f16, | ||
const_node->get_shape(), | ||
const_node->cast_vector<float16>().data()); | ||
auto convert = std::make_shared<ngraph::opset8::Convert>(new_const, const_node->get_element_type()); | ||
|
||
convert->set_friendly_name(m.get_match_root()->get_friendly_name()); | ||
ngraph::copy_runtime_info(const_node, convert); | ||
|
||
auto& convert_rt_info = convert->get_rt_info(); | ||
convert_rt_info[ov::Decompression::get_type_info_static()] = std::make_shared<ov::Decompression>(); | ||
|
||
ngraph::replace_node(m.get_match_root(), convert); | ||
|
||
return true; | ||
}; | ||
|
||
auto m = std::make_shared<ngraph::pattern::Matcher>(const_node, matcher_name); | ||
this->register_matcher(m, callback); | ||
} |
30 changes: 30 additions & 0 deletions
30
...rc/transformations/common_optimizations/disable_decomression_convert_constant_folding.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "transformations/common_optimizations/disable_decomression_convert_constant_folding.hpp" | ||
|
||
#include <ngraph/opsets/opset8.hpp> | ||
#include <ngraph/pattern/op/wrap_type.hpp> | ||
#include <transformations/rt_info/disable_constant_folding.hpp> | ||
#include <transformations/rt_info/decompression.hpp> | ||
#include <itt.hpp> | ||
|
||
NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableDecompressionConvertConstantFolding, "DisableDecompressionConvertConstantFolding", 0); | ||
|
||
ngraph::pass::DisableDecompressionConvertConstantFolding::DisableDecompressionConvertConstantFolding() { | ||
MATCHER_SCOPE(DisableDecompressionConvertConstantFolding); | ||
auto convert = pattern::wrap_type<opset8::Convert>(); | ||
|
||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { | ||
const auto& node = m.get_match_root(); | ||
const auto& rt_info = node->get_rt_info(); | ||
if (!rt_info.count(ov::Decompression::get_type_info_static())) | ||
return false; | ||
disable_constant_folding(node); | ||
return true; | ||
}; | ||
|
||
auto m = std::make_shared<ngraph::pattern::Matcher>(convert, matcher_name); | ||
this->register_matcher(m, callback); | ||
} |
Oops, something went wrong.