Skip to content

Commit

Permalink
Arch tests and fixes for multi arch DFT
Browse files Browse the repository at this point in the history
  • Loading branch information
dancazarin committed Mar 24, 2020
1 parent a5f9f83 commit 25eceff
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 47 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Expand Up @@ -196,6 +196,7 @@ function (add_arch_library NAME ARCH SRCS DEFS)
target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH}) target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH})
target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS}) target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS})
target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH}) target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH})
target_compile_options(${NAME}_${ARCH} PRIVATE -flto)
endfunction () endfunction ()


if (ENABLE_DFT) if (ENABLE_DFT)
Expand Down
6 changes: 3 additions & 3 deletions azure-pipelines.yml
Expand Up @@ -118,7 +118,7 @@ jobs:
/bin/bash -c "sudo xcode-select -s /Applications/Xcode_$(XCODE_VER).app/Contents/Developer" /bin/bash -c "sudo xcode-select -s /Applications/Xcode_$(XCODE_VER).app/Contents/Developer"
brew install ninja brew install ninja
ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCMAKE_BUILD_TYPE=Release ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCPU_ARCH=sse2 -DENABLE_DFT_MULTIARCH=ON -DCMAKE_BUILD_TYPE=Release
- job: iOS_ARM_Clang_Release - job: iOS_ARM_Clang_Release
timeoutInMinutes: 120 timeoutInMinutes: 120
Expand Down Expand Up @@ -217,7 +217,7 @@ jobs:
set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=% set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
set PATH=%PATH:C:\Strawberry\c\bin;=% set PATH=%PATH:C:\Strawberry\c\bin;=%
set PATH=C:\sde;%PATH% set PATH=C:\sde;%PATH%
ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release
- job: Windows_MSVC_x86_AVX512_Clang9_Release - job: Windows_MSVC_x86_AVX512_Clang9_Release
timeoutInMinutes: 120 timeoutInMinutes: 120
Expand All @@ -236,7 +236,7 @@ jobs:
set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=% set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
set PATH=%PATH:C:\Strawberry\c\bin;=% set PATH=%PATH:C:\Strawberry\c\bin;=%
set PATH=C:\sde;%PATH% set PATH=C:\sde;%PATH%
ci\run.cmd build-release -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release
- job: Windows_MSVC_x86_64_AVX512_MSVC2017_Release - job: Windows_MSVC_x86_64_AVX512_MSVC2017_Release
timeoutInMinutes: 120 timeoutInMinutes: 120
Expand Down
111 changes: 70 additions & 41 deletions include/kfr/dft/impl/ft.hpp
Expand Up @@ -483,7 +483,7 @@ constexpr KFR_INTRINSIC cvec<T, width> fixed_twiddle(size_t size, size_t start,
// constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>(); // constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>();


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
constexpr KFR_INTRINSIC cvec<T, N> twiddleimagmask() constexpr static inline cvec<T, N> twiddleimagmask()
{ {
return inverse ? broadcast<N * 2, T>(-1, +1) : broadcast<N * 2, T>(+1, -1); return inverse ? broadcast<N * 2, T>(-1, +1) : broadcast<N * 2, T>(+1, -1);
} }
Expand Down Expand Up @@ -1022,11 +1022,16 @@ KFR_INTRINSIC void apply_twiddles2(cvec<T, N>& a1)
} }


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw3r1 = static_cast<T>(-0.5 - 1.0); static constexpr KFR_INTRINSIC cvec<T, N> tw3r1()
{
return static_cast<T>(-0.5 - 1.0);
}


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw3i1 = static constexpr KFR_INTRINSIC cvec<T, N> tw3i1()
static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>(); {
return static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>();
}


template <size_t N, bool inverse = false, typename T> template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00, KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00,
Expand All @@ -1037,9 +1042,9 @@ KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv
const cvec<T, N> dif1 = swap<2>(a01 - a02); const cvec<T, N> dif1 = swap<2>(a01 - a02);
w00 = a00 + sum1; w00 = a00 + sum1;


const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>; const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>();


const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>; const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>();


w01 = s1 + d1; w01 = s1 + d1;
w02 = s1 - d1; w02 = s1 - d1;
Expand Down Expand Up @@ -1132,25 +1137,40 @@ KFR_INTRINSIC void butterfly9(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cv
} }


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7r1 = static_cast<T>(0.623489801858733530525004884 - 1.0); static constexpr KFR_INTRINSIC cvec<T, N> tw7r1()
{
return static_cast<T>(0.623489801858733530525004884 - 1.0);
}


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7i1 = static constexpr KFR_INTRINSIC cvec<T, N> tw7i1()
static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>(); {
return static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>();
}


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7r2 = static_cast<T>(-0.2225209339563144042889025645 - 1.0); static constexpr KFR_INTRINSIC cvec<T, N> tw7r2()
{
return static_cast<T>(-0.2225209339563144042889025645 - 1.0);
}


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7i2 = static constexpr KFR_INTRINSIC cvec<T, N> tw7i2()
static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>(); {
return static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>();
}


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7r3 = static_cast<T>(-0.90096886790241912623610231951 - 1.0); static constexpr KFR_INTRINSIC cvec<T, N> tw7r3()
{
return static_cast<T>(-0.90096886790241912623610231951 - 1.0);
}


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7i3 = static constexpr KFR_INTRINSIC cvec<T, N> tw7i3()
static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>(); {
return static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>();
}


template <size_t N, bool inverse = false, typename T> template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04, KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04,
Expand All @@ -1167,18 +1187,18 @@ KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv
w00 = a00 + sum1 + sum2 + sum3; w00 = a00 + sum1 + sum2 + sum3;


const cvec<T, N> s1 = const cvec<T, N> s1 =
w00 + sum1 * tw7r1<T, N, inverse> + sum2 * tw7r2<T, N, inverse> + sum3 * tw7r3<T, N, inverse>; w00 + sum1 * tw7r1<T, N, inverse>() + sum2 * tw7r2<T, N, inverse>() + sum3 * tw7r3<T, N, inverse>();
const cvec<T, N> s2 = const cvec<T, N> s2 =
w00 + sum1 * tw7r2<T, N, inverse> + sum2 * tw7r3<T, N, inverse> + sum3 * tw7r1<T, N, inverse>; w00 + sum1 * tw7r2<T, N, inverse>() + sum2 * tw7r3<T, N, inverse>() + sum3 * tw7r1<T, N, inverse>();
const cvec<T, N> s3 = const cvec<T, N> s3 =
w00 + sum1 * tw7r3<T, N, inverse> + sum2 * tw7r1<T, N, inverse> + sum3 * tw7r2<T, N, inverse>; w00 + sum1 * tw7r3<T, N, inverse>() + sum2 * tw7r1<T, N, inverse>() + sum3 * tw7r2<T, N, inverse>();


const cvec<T, N> d1 = const cvec<T, N> d1 =
dif1 * tw7i1<T, N, inverse> + dif2 * tw7i2<T, N, inverse> + dif3 * tw7i3<T, N, inverse>; dif1 * tw7i1<T, N, inverse>() + dif2 * tw7i2<T, N, inverse>() + dif3 * tw7i3<T, N, inverse>();
const cvec<T, N> d2 = const cvec<T, N> d2 =
dif1 * tw7i2<T, N, inverse> - dif2 * tw7i3<T, N, inverse> - dif3 * tw7i1<T, N, inverse>; dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>()- dif3 * tw7i1<T, N, inverse>();
const cvec<T, N> d3 = const cvec<T, N> d3 =
dif1 * tw7i3<T, N, inverse> - dif2 * tw7i1<T, N, inverse> + dif3 * tw7i2<T, N, inverse>; dif1 * tw7i3<T, N, inverse>() - dif2 * tw7i1<T, N, inverse>() + dif3 * tw7i2<T, N, inverse>();


w01 = s1 + d1; w01 = s1 + d1;
w06 = s1 - d1; w06 = s1 - d1;
Expand Down Expand Up @@ -1294,15 +1314,25 @@ KFR_INTRINSIC void butterfly11(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, c
} }


template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5r1 = static_cast<T>(0.30901699437494742410229341718 - 1.0); static constexpr KFR_INTRINSIC cvec<T, N> tw5r1()
{
return static_cast<T>(0.30901699437494742410229341718 - 1.0);
}
template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5i1 = static constexpr KFR_INTRINSIC cvec<T, N> tw5i1()
static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>(); {
return static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>();
}
template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5r2 = static_cast<T>(-0.80901699437494742410229341718 - 1.0); static constexpr KFR_INTRINSIC cvec<T, N> tw5r2()
{
return static_cast<T>(-0.80901699437494742410229341718 - 1.0);
}
template <typename T, size_t N, bool inverse> template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5i2 = static constexpr KFR_INTRINSIC cvec<T, N> tw5i2()
static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>(); {
return static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>();
}


template <size_t N, bool inverse = false, typename T> template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02, KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02,
Expand All @@ -1315,11 +1345,11 @@ KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, cons
const cvec<T, N> dif2 = swap<2>(a02 - a03); const cvec<T, N> dif2 = swap<2>(a02 - a03);
w00 = a00 + sum1 + sum2; w00 = a00 + sum1 + sum2;


const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse> + sum2 * tw5r2<T, N, inverse>; const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse>() + sum2 * tw5r2<T, N, inverse>();
const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse> + sum2 * tw5r1<T, N, inverse>; const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse>() + sum2 * tw5r1<T, N, inverse>();


const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse> + dif2 * tw5i2<T, N, inverse>; const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse>() + dif2 * tw5i2<T, N, inverse>();
const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse> - dif2 * tw5i1<T, N, inverse>; const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse>() - dif2 * tw5i1<T, N, inverse>();


w01 = s1 + d1; w01 = s1 + d1;
w04 = s1 - d1; w04 = s1 - d1;
Expand Down Expand Up @@ -1690,16 +1720,15 @@ template <typename T, bool inverse, typename Tstride = csize_t<1>>
KFR_INTRINSIC void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in, KFR_INTRINSIC void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
complex<T>*, const complex<T>* twiddle, Tstride ostride = {}) complex<T>*, const complex<T>* twiddle, Tstride ostride = {})
{ {
cswitch( cswitch(csizes_t<11, 13>(), radix,
csizes_t<11, 13>(), radix, [&](auto radix_) CMT_INLINE_LAMBDA {
[&](auto radix_) CMT_INLINE_LAMBDA { constexpr size_t width = vector_width<T>;
constexpr size_t width = vector_width<T>; spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride);
spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride); },
}, [&]() CMT_INLINE_LAMBDA {
[&]() CMT_INLINE_LAMBDA { constexpr size_t width = vector_width<T>;
constexpr size_t width = vector_width<T>; generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride); });
});
} }


template <typename T, size_t N> template <typename T, size_t N>
Expand Down
5 changes: 2 additions & 3 deletions tests/CMakeLists.txt
Expand Up @@ -253,8 +253,7 @@ if (NOT SKIP_TESTS)
${PROJECT_BINARY_DIR}/bin/all_tests_${A}) ${PROJECT_BINARY_DIR}/bin/all_tests_${A})
endif () endif ()
endforeach () endforeach ()
else ()
add_test(NAME all_tests COMMAND ${EMULATOR}
${PROJECT_BINARY_DIR}/bin/all_tests)
endif () endif ()
add_test(NAME all_tests COMMAND ${SDE} ${SDE_ARCH_${CPU_ARCH}} -chip_check_exe_only
-- ${PROJECT_BINARY_DIR}/bin/all_tests)
endif () endif ()

0 comments on commit 25eceff

Please sign in to comment.