Skip to content

Commit

Permalink
Shyrma fix (deeplearning4j#6930)
Browse files Browse the repository at this point in the history
* provide intrinsic gnu gcc function for casting float -> float16

* f16c support for avx2 builds

* meh

* another var name

* check whether correct functions are coalled while float <--> float16 cast
  • Loading branch information
Yurii Shyrma authored and printomi committed Jan 7, 2019
1 parent 38ac914 commit 2382615
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 6 deletions.
2 changes: 1 addition & 1 deletion libnd4j/blas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ IF(${ARCH} MATCHES "arm*")
ELSEIF(${ARCH} MATCHES "power*")
set(ARCH_TUNE "-mcpu=${ARCH} -mtune=${ARCH} -D__POWER")
ELSEIF(${EXTENSION} MATCHES "avx2")
set(ARCH_TUNE "-march=${ARCH} -mtune=${ARCH} -msse4.1 -msse4.2 -mavx -mavx2 -mfma")
set(ARCH_TUNE "-march=${ARCH} -mtune=${ARCH} -msse4.1 -msse4.2 -mavx -mavx2 -mfma -mf16c -D__F16C__=true")
ELSE()
if (${ARCH} STREQUAL "x86-64")
set(ARCH_TYPE "generic")
Expand Down
8 changes: 6 additions & 2 deletions libnd4j/include/types/float16.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
#include <iosfwd>
#include <iostream>
#include <pointercast.h>
#if defined(__INTEL_COMPILER) || defined(__F16C__)
#include <immintrin.h>
#endif

// support for half precision conversion
#ifdef __INTEL_COMPILER
Expand Down Expand Up @@ -116,7 +119,7 @@ typedef __half ihalf;
#include <fp16_emu.h>


#ifdef __INTEL_COMPILER
#if defined(__INTEL_COMPILER) || defined(__F16C__)
//_Pragma("omp declare simd") inline
local_def float cpu_ihalf2float(ihalf h) {
return _cvtsh_ss(h.getX());
Expand Down Expand Up @@ -151,11 +154,12 @@ local_def float cpu_ihalf2float(ihalf h) {
}
#endif

#ifdef __INTEL_COMPILER
#if defined(__INTEL_COMPILER) || defined(__F16C__)
//_Pragma("omp declare simd") inline
local_def ihalf cpu_float2ihalf_rn(float f) {
ihalf ret;
ret.x = _cvtss_sh(f, 0);

return ret;
}

Expand Down
15 changes: 12 additions & 3 deletions libnd4j/tests_cpu/layers_tests/DataTypesValidationTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ TEST_F(DataTypesValidationTests, Basic_Test_1) {
}

TEST_F(DataTypesValidationTests, Basic_Test_2) {
auto input = NDArrayFactory::create<int8_t>('c', {1, 1, 1, 4});
auto weights = NDArrayFactory::create<float16>('c', {1, 1, 1, 4});
auto exp = NDArrayFactory::create<float16>('c', {1, 4, 1, 4}, {2., 4., 6., 8., 2., 4., 6., 8., 2., 4., 6., 8., 2., 4., 6., 8.});
auto input = NDArrayFactory::create<uint8_t>('c', {1, 1, 1, 4});
auto weights = NDArrayFactory::create<float>('c', {1, 1, 1, 4});
auto exp = NDArrayFactory::create<float>('c', {1, 4, 1, 4}, {2., 4., 6., 8., 2., 4., 6., 8., 2., 4., 6., 8., 2., 4., 6., 8.});

weights.assign(2.0);
input.linspace(1);
Expand Down Expand Up @@ -103,3 +103,12 @@ TEST_F(DataTypesValidationTests, Basic_Test_4) {
auto result = op.execute({&input, &weights}, {&out}, {}, {1, 1, 1, 1, 0, 0, 1, 1, 0, 0}, {});
ASSERT_EQ(ND4J_STATUS_VALIDATION, result);
}

TEST_F(DataTypesValidationTests, cast_1) {

float16 x = static_cast<float16>(1.f);
float y = static_cast<float16>(x);

ASSERT_TRUE(1.f == x);
ASSERT_TRUE(y == x);
}
4 changes: 4 additions & 0 deletions libnd4j/tests_cpu/libnd4j_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ else()
else()
set(CMAKE_CXX_FLAGS " -g -O0 -fPIC -std=c++11 -fassociative-math -funsafe-math-optimizations -fsanitize=address")
endif()

if (${F16C})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c -D__F16C__=true")
endif()
endif()

if ("${EXPERIMENTAL}" STREQUAL "yes")
Expand Down

0 comments on commit 2382615

Please sign in to comment.