-
Notifications
You must be signed in to change notification settings - Fork 688
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
opencv: disable broken Intel FP16 detection
With opencv-3.2, the configuration detects if the target supports half-precision floating-point format. This fails to compile for some Intel targets such as skylake with an error such as: error: '_mm_cvtph_ps' was not declared in this scope The configuration used to work in opencv-3.1 so revert two commits to drop the FP16 detection even though it may make opencv slower. The only change in the configure log is: - FP16: Compiler support is available + FP16: Compiler support is not available The patch should be dropped when a newer version of opencv that includes commit: opencv/opencv@e5d9b60 is available. Backporting that fix was complicated. Signed-off-by: Randy MacLeod <Randy.MacLeod@windriver.com> Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
- Loading branch information
1 parent
296fb49
commit c931bc6
Showing
3 changed files
with
274 additions
and
0 deletions.
There are no files selected for viewing
27 changes: 27 additions & 0 deletions
27
meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
From 69f9707678190f6a0948a547dce948251f972676 Mon Sep 17 00:00:00 2001 | ||
From: Randy MacLeod <Randy.MacLeod@windriver.com> | ||
Date: Wed, 26 Apr 2017 14:57:30 -0400 | ||
Subject: [PATCH 1/2] Revert "cuda: fix fp16 compilation" | ||
|
||
This reverts commit 12e00827be40576b686ea4438a6e6ef85208743d. | ||
--- | ||
modules/core/include/opencv2/core/cvdef.h | 3 +-- | ||
1 file changed, 1 insertion(+), 2 deletions(-) | ||
|
||
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h | ||
index 699b166..efc24ca 100644 | ||
--- a/modules/core/include/opencv2/core/cvdef.h | ||
+++ b/modules/core/include/opencv2/core/cvdef.h | ||
@@ -303,8 +303,7 @@ enum CpuFeatures { | ||
#define CV_2PI 6.283185307179586476925286766559 | ||
#define CV_LOG2 0.69314718055994530941723212145818 | ||
|
||
-#if defined __ARM_FP16_FORMAT_IEEE \ | ||
- && !defined __CUDACC__ | ||
+#if defined (__ARM_FP16_FORMAT_IEEE) | ||
# define CV_FP16_TYPE 1 | ||
#else | ||
# define CV_FP16_TYPE 0 | ||
-- | ||
2.9.3 | ||
|
245 changes: 245 additions & 0 deletions
245
meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,245 @@ | ||
From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001 | ||
From: Randy MacLeod <Randy.MacLeod@windriver.com> | ||
Date: Wed, 26 Apr 2017 15:00:32 -0400 | ||
Subject: [PATCH 2/2] Revert "check FP16 build condition correctly" | ||
|
||
This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13. | ||
|
||
Fix up revert conflicts to take previous behaviour. | ||
--- | ||
cmake/OpenCVCompilerOptions.cmake | 45 +++++++++-------------- | ||
modules/core/include/opencv2/core/cvdef.h | 2 +- | ||
modules/core/src/convert.cpp | 11 +++--- | ||
modules/core/test/test_intrin.cpp | 60 ++++++++++++++----------------- | ||
4 files changed, 48 insertions(+), 70 deletions(-) | ||
|
||
diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake | ||
index 5bb0479..4b19fdb 100644 | ||
--- a/cmake/OpenCVCompilerOptions.cmake | ||
+++ b/cmake/OpenCVCompilerOptions.cmake | ||
@@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) | ||
add_extra_compiler_option("-mfp16-format=ieee") | ||
endif(ARM) | ||
if(ENABLE_NEON) | ||
- add_extra_compiler_option("-mfpu=neon") | ||
+ add_extra_compiler_option("-mfpu=neon-fp16") | ||
endif() | ||
if(ENABLE_VFPV3 AND NOT ENABLE_NEON) | ||
add_extra_compiler_option("-mfpu=vfpv3") | ||
@@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) | ||
add_extra_compiler_option(-fvisibility-inlines-hidden) | ||
endif() | ||
|
||
-if(NOT OPENCV_FP16_DISABLE AND NOT IOS) | ||
- if(ARM AND ENABLE_NEON) | ||
- set(FP16_OPTION "-mfpu=neon-fp16") | ||
- elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX) | ||
- set(FP16_OPTION "-mf16c") | ||
- endif() | ||
- try_compile(__VALID_FP16 | ||
- "${OpenCV_BINARY_DIR}" | ||
- "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" | ||
- COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" | ||
- OUTPUT_VARIABLE TRY_OUT | ||
- ) | ||
- if(NOT __VALID_FP16) | ||
- if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX) | ||
- # GCC enables AVX when mf16c is passed | ||
- message(STATUS "FP16: Feature disabled") | ||
- else() | ||
- message(STATUS "FP16: Compiler support is not available") | ||
- endif() | ||
- else() | ||
- message(STATUS "FP16: Compiler support is available") | ||
- set(HAVE_FP16 1) | ||
- if(NOT ${FP16_OPTION} STREQUAL "") | ||
- add_extra_compiler_option(${FP16_OPTION}) | ||
- endif() | ||
- endif() | ||
-endif() | ||
- | ||
#combine all "extra" options | ||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") | ||
@@ -450,6 +422,21 @@ if(MSVC) | ||
endif() | ||
endif() | ||
|
||
+if(NOT OPENCV_FP16_DISABLE) | ||
+ try_compile(__VALID_FP16 | ||
+ "${OpenCV_BINARY_DIR}" | ||
+ "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" | ||
+ COMPILE_DEFINITIONS "-DCHECK_FP16" | ||
+ OUTPUT_VARIABLE TRY_OUT | ||
+ ) | ||
+ if(NOT __VALID_FP16) | ||
+ message(STATUS "FP16: Compiler support is not available") | ||
+ else() | ||
+ message(STATUS "FP16: Compiler support is available") | ||
+ set(HAVE_FP16 1) | ||
+ endif() | ||
+endif() | ||
+ | ||
if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib") | ||
link_directories("/usr/local/lib") | ||
endif() | ||
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h | ||
index efc24ca..a10936b 100644 | ||
--- a/modules/core/include/opencv2/core/cvdef.h | ||
+++ b/modules/core/include/opencv2/core/cvdef.h | ||
@@ -312,7 +312,7 @@ enum CpuFeatures { | ||
typedef union Cv16suf | ||
{ | ||
short i; | ||
-#if CV_FP16_TYPE | ||
+#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) | ||
__fp16 h; | ||
#endif | ||
struct _fp16Format | ||
diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp | ||
index e04d89e..46db26f 100644 | ||
--- a/modules/core/src/convert.cpp | ||
+++ b/modules/core/src/convert.cpp | ||
@@ -44,7 +44,6 @@ | ||
#include "precomp.hpp" | ||
|
||
#include "opencl_kernels_core.hpp" | ||
-#include "opencv2/core/hal/intrin.hpp" | ||
|
||
#include "opencv2/core/openvx/ovx_defs.hpp" | ||
|
||
@@ -4382,7 +4381,7 @@ struct Cvt_SIMD<float, int> | ||
|
||
#endif | ||
|
||
-#if !CV_FP16_TYPE | ||
+#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) ) | ||
// const numbers for floating points format | ||
const unsigned int kShiftSignificand = 13; | ||
const unsigned int kMaskFp16Significand = 0x3ff; | ||
@@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent = 15; | ||
const unsigned int kBiasFp32Exponent = 127; | ||
#endif | ||
|
||
-#if CV_FP16_TYPE | ||
+#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) | ||
static float convertFp16SW(short fp16) | ||
{ | ||
// Fp16 -> Fp32 | ||
@@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16) | ||
} | ||
#endif | ||
|
||
-#if CV_FP16_TYPE | ||
+#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) | ||
static short convertFp16SW(float fp32) | ||
{ | ||
// Fp32 -> Fp16 | ||
@@ -4560,7 +4559,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t | ||
if ( ( (intptr_t)dst & 0xf ) == 0 ) | ||
#endif | ||
{ | ||
-#if CV_FP16 && CV_SIMD128 | ||
+#if CV_FP16 | ||
for ( ; x <= size.width - 4; x += 4) | ||
{ | ||
v_float32x4 v_src = v_load(src + x); | ||
@@ -4606,7 +4605,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t | ||
if ( ( (intptr_t)src & 0xf ) == 0 ) | ||
#endif | ||
{ | ||
-#if CV_FP16 && CV_SIMD128 | ||
+#if CV_FP16 | ||
for ( ; x <= size.width - 4; x += 4) | ||
{ | ||
v_float16x4 v_src = v_load_f16(src + x); | ||
diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp | ||
index 66b2083..7349d48 100644 | ||
--- a/modules/core/test/test_intrin.cpp | ||
+++ b/modules/core/test/test_intrin.cpp | ||
@@ -729,56 +729,48 @@ template<typename R> struct TheTest | ||
return *this; | ||
} | ||
|
||
+#if CV_FP16 | ||
TheTest & test_loadstore_fp16() | ||
{ | ||
-#if CV_FP16 | ||
AlignedData<R> data; | ||
AlignedData<R> out; | ||
|
||
- if(checkHardwareSupport(CV_CPU_FP16)) | ||
- { | ||
- // check if addresses are aligned and unaligned respectively | ||
- EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); | ||
- EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); | ||
- EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); | ||
- EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); | ||
- | ||
- // check some initialization methods | ||
- R r1 = data.u; | ||
- R r2 = v_load_f16(data.a.d); | ||
- R r3(r2); | ||
- EXPECT_EQ(data.u[0], r1.get0()); | ||
- EXPECT_EQ(data.a[0], r2.get0()); | ||
- EXPECT_EQ(data.a[0], r3.get0()); | ||
- | ||
- // check some store methods | ||
- out.a.clear(); | ||
- v_store_f16(out.a.d, r1); | ||
- EXPECT_EQ(data.a, out.a); | ||
- } | ||
+ // check if addresses are aligned and unaligned respectively | ||
+ EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); | ||
+ EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); | ||
+ EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); | ||
+ EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); | ||
+ | ||
+ // check some initialization methods | ||
+ R r1 = data.u; | ||
+ R r2 = v_load_f16(data.a.d); | ||
+ R r3(r2); | ||
+ EXPECT_EQ(data.u[0], r1.get0()); | ||
+ EXPECT_EQ(data.a[0], r2.get0()); | ||
+ EXPECT_EQ(data.a[0], r3.get0()); | ||
+ | ||
+ // check some store methods | ||
+ out.a.clear(); | ||
+ v_store_f16(out.a.d, r1); | ||
+ EXPECT_EQ(data.a, out.a); | ||
|
||
return *this; | ||
-#endif | ||
} | ||
|
||
TheTest & test_float_cvt_fp16() | ||
{ | ||
-#if CV_FP16 | ||
AlignedData<v_float32x4> data; | ||
|
||
- if(checkHardwareSupport(CV_CPU_FP16)) | ||
- { | ||
- // check conversion | ||
- v_float32x4 r1 = v_load(data.a.d); | ||
- v_float16x4 r2 = v_cvt_f16(r1); | ||
- v_float32x4 r3 = v_cvt_f32(r2); | ||
- EXPECT_EQ(0x3c00, r2.get0()); | ||
- EXPECT_EQ(r3.get0(), r1.get0()); | ||
- } | ||
+ // check conversion | ||
+ v_float32x4 r1 = v_load(data.a.d); | ||
+ v_float16x4 r2 = v_cvt_f16(r1); | ||
+ v_float32x4 r3 = v_cvt_f32(r2); | ||
+ EXPECT_EQ(0x3c00, r2.get0()); | ||
+ EXPECT_EQ(r3.get0(), r1.get0()); | ||
|
||
return *this; | ||
-#endif | ||
} | ||
+#endif | ||
|
||
}; | ||
|
||
-- | ||
2.9.3 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters