Skip to content

Commit

Permalink
GNA MTL mixed precision (#13166)
Browse files Browse the repository at this point in the history
* [GNA] Enable int8 weights for convolutions

* [GNA] Implement convolution low precision tests for MTL

Co-authored-by: Marcin Kacprzak <marcin.kacprzak@intel.com>
  • Loading branch information
sirzabek and marcinkacprzak committed Nov 8, 2022
1 parent 0fa1573 commit 3ea77b9
Show file tree
Hide file tree
Showing 39 changed files with 2,501 additions and 2,214 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_gna/src/backend/make_pwl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "gna_slope_scale.h"
#include "dnn_types.h"
#include "backend/gna_types.h"
#include "round_float_define.hpp"
#include "common/numerical_utils.hpp"
#include "pwl_input_params.hpp"
#include "pwl_segments_creator_factory.hpp"
#include "log/log.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include "log/debug.hpp"
#include "pwl_input_params.hpp"
#include "round_float_define.hpp"
#include "common/numerical_utils.hpp"

namespace ov {
namespace intel_gna {
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gna/src/backend/pwl_tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "pwl_tools.hpp"

#include "gna_slope_scale.h"
#include "round_float_define.hpp"
#include "common/numerical_utils.hpp"
#include "runtime/pwl.h"

namespace ov {
Expand Down
31 changes: 31 additions & 0 deletions src/plugins/intel_gna/src/common/numerical_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cstdlib>
#include <algorithm>

namespace ov {
namespace intel_gna {
namespace common {

#define FLOAT_TO_INT8(a) static_cast<int8_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))
#define FLOAT_TO_INT64(a) static_cast<int64_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))

/**
* @brief Compares two float values and returns if they are equal
* @param p1 First float value
* @param p2 Second float value
* @return Returns true if two float values are equal
*/
inline bool fp32eq(float p1, float p2, float accuracy = 0.00001f) {
return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2)));
}

} // namespace common
} // namespace intel_gna
} // namespace ov

0 comments on commit 3ea77b9

Please sign in to comment.