Skip to content
Permalink
Browse files

Arch tests and fixes for multi arch DFT

  • Loading branch information
dlevin256 committed Mar 24, 2020
1 parent a5f9f83 commit 25eceffebe4b2dc4520b4926d7323551b0928691
Showing with 76 additions and 47 deletions.
  1. +1 −0 CMakeLists.txt
  2. +3 −3 azure-pipelines.yml
  3. +70 −41 include/kfr/dft/impl/ft.hpp
  4. +2 −3 tests/CMakeLists.txt
@@ -196,6 +196,7 @@ function (add_arch_library NAME ARCH SRCS DEFS)
target_set_arch(${NAME}_${ARCH} PRIVATE ${ARCH})
target_compile_options(${NAME}_${ARCH} PRIVATE ${DEFS})
target_link_libraries(${NAME}_all INTERFACE ${NAME}_${ARCH})
target_compile_options(${NAME}_${ARCH} PRIVATE -flto)
endfunction ()

if (ENABLE_DFT)
@@ -118,7 +118,7 @@ jobs:
/bin/bash -c "sudo xcode-select -s /Applications/Xcode_$(XCODE_VER).app/Contents/Developer"
brew install ninja
ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCMAKE_BUILD_TYPE=Release
ci/run.sh build-release -DENABLE_CAPI_BUILD=ON -DUSE_SDE=ON -DARCH_TESTS=sse2,ssse3,sse41,avx,avx2,avx512 -DCPU_ARCH=sse2 -DENABLE_DFT_MULTIARCH=ON -DCMAKE_BUILD_TYPE=Release
- job: iOS_ARM_Clang_Release
timeoutInMinutes: 120
@@ -217,7 +217,7 @@ jobs:
set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
set PATH=%PATH:C:\Strawberry\c\bin;=%
set PATH=C:\sde;%PATH%
ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release
ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m64 -DCMAKE_BUILD_TYPE=Release
- job: Windows_MSVC_x86_AVX512_Clang9_Release
timeoutInMinutes: 120
@@ -236,7 +236,7 @@ jobs:
set PATH=%PATH:C:\Program Files\Git\mingw64\bin;=%
set PATH=%PATH:C:\Strawberry\c\bin;=%
set PATH=C:\sde;%PATH%
ci\run.cmd build-release -DARCH_TESTS=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=avx512 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release
ci\run.cmd build-release -DENABLE_CAPI_BUILD=ON -DARCH_TESTS=ON -DENABLE_DFT_MULTIARCH=ON -DCMAKE_CXX_COMPILER="C:/Program Files/LLVM/bin/clang-cl.exe" -DUSE_SDE=ON -DCPU_ARCH=sse2 -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_BUILD_TYPE=Release
- job: Windows_MSVC_x86_64_AVX512_MSVC2017_Release
timeoutInMinutes: 120
@@ -483,7 +483,7 @@ constexpr KFR_INTRINSIC cvec<T, width> fixed_twiddle(size_t size, size_t start,
// constexpr cvec<T, N> fixed_twiddle = get_fixed_twiddle<T, N, size, start, step, inverse>();

template <typename T, size_t N, bool inverse>
constexpr KFR_INTRINSIC cvec<T, N> twiddleimagmask()
constexpr static inline cvec<T, N> twiddleimagmask()
{
return inverse ? broadcast<N * 2, T>(-1, +1) : broadcast<N * 2, T>(+1, -1);
}
@@ -1022,11 +1022,16 @@ KFR_INTRINSIC void apply_twiddles2(cvec<T, N>& a1)
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw3r1 = static_cast<T>(-0.5 - 1.0);
static constexpr KFR_INTRINSIC cvec<T, N> tw3r1()
{
return static_cast<T>(-0.5 - 1.0);
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw3i1 =
static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>();
static constexpr KFR_INTRINSIC cvec<T, N> tw3i1()
{
return static_cast<T>(0.86602540378443864676372317075) * twiddleimagmask<T, N, inverse>();
}

template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N>& w00,
@@ -1037,9 +1042,9 @@ KFR_INTRINSIC void butterfly3(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv
const cvec<T, N> dif1 = swap<2>(a01 - a02);
w00 = a00 + sum1;

const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>;
const cvec<T, N> s1 = w00 + sum1 * tw3r1<T, N, inverse>();

const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>;
const cvec<T, N> d1 = dif1 * tw3i1<T, N, inverse>();

w01 = s1 + d1;
w02 = s1 - d1;
@@ -1132,25 +1137,40 @@ KFR_INTRINSIC void butterfly9(cvec<T, N>& a0, cvec<T, N>& a1, cvec<T, N>& a2, cv
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7r1 = static_cast<T>(0.623489801858733530525004884 - 1.0);
static constexpr KFR_INTRINSIC cvec<T, N> tw7r1()
{
return static_cast<T>(0.623489801858733530525004884 - 1.0);
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7i1 =
static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>();
static constexpr KFR_INTRINSIC cvec<T, N> tw7i1()
{
return static_cast<T>(0.78183148246802980870844452667) * twiddleimagmask<T, N, inverse>();
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7r2 = static_cast<T>(-0.2225209339563144042889025645 - 1.0);
static constexpr KFR_INTRINSIC cvec<T, N> tw7r2()
{
return static_cast<T>(-0.2225209339563144042889025645 - 1.0);
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7i2 =
static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>();
static constexpr KFR_INTRINSIC cvec<T, N> tw7i2()
{
return static_cast<T>(0.97492791218182360701813168299) * twiddleimagmask<T, N, inverse>();
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7r3 = static_cast<T>(-0.90096886790241912623610231951 - 1.0);
static constexpr KFR_INTRINSIC cvec<T, N> tw7r3()
{
return static_cast<T>(-0.90096886790241912623610231951 - 1.0);
}

template <typename T, size_t N, bool inverse>
static const cvec<T, N> tw7i3 =
static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>();
static constexpr KFR_INTRINSIC cvec<T, N> tw7i3()
{
return static_cast<T>(0.43388373911755812047576833285) * twiddleimagmask<T, N, inverse>();
}

template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cvec<T, N> a03, cvec<T, N> a04,
@@ -1167,18 +1187,18 @@ KFR_INTRINSIC void butterfly7(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, cv
w00 = a00 + sum1 + sum2 + sum3;

const cvec<T, N> s1 =
w00 + sum1 * tw7r1<T, N, inverse> + sum2 * tw7r2<T, N, inverse> + sum3 * tw7r3<T, N, inverse>;
w00 + sum1 * tw7r1<T, N, inverse>() + sum2 * tw7r2<T, N, inverse>() + sum3 * tw7r3<T, N, inverse>();
const cvec<T, N> s2 =
w00 + sum1 * tw7r2<T, N, inverse> + sum2 * tw7r3<T, N, inverse> + sum3 * tw7r1<T, N, inverse>;
w00 + sum1 * tw7r2<T, N, inverse>() + sum2 * tw7r3<T, N, inverse>() + sum3 * tw7r1<T, N, inverse>();
const cvec<T, N> s3 =
w00 + sum1 * tw7r3<T, N, inverse> + sum2 * tw7r1<T, N, inverse> + sum3 * tw7r2<T, N, inverse>;
w00 + sum1 * tw7r3<T, N, inverse>() + sum2 * tw7r1<T, N, inverse>() + sum3 * tw7r2<T, N, inverse>();

const cvec<T, N> d1 =
dif1 * tw7i1<T, N, inverse> + dif2 * tw7i2<T, N, inverse> + dif3 * tw7i3<T, N, inverse>;
dif1 * tw7i1<T, N, inverse>() + dif2 * tw7i2<T, N, inverse>() + dif3 * tw7i3<T, N, inverse>();
const cvec<T, N> d2 =
dif1 * tw7i2<T, N, inverse> - dif2 * tw7i3<T, N, inverse> - dif3 * tw7i1<T, N, inverse>;
dif1 * tw7i2<T, N, inverse>() - dif2 * tw7i3<T, N, inverse>()- dif3 * tw7i1<T, N, inverse>();
const cvec<T, N> d3 =
dif1 * tw7i3<T, N, inverse> - dif2 * tw7i1<T, N, inverse> + dif3 * tw7i2<T, N, inverse>;
dif1 * tw7i3<T, N, inverse>() - dif2 * tw7i1<T, N, inverse>() + dif3 * tw7i2<T, N, inverse>();

w01 = s1 + d1;
w06 = s1 - d1;
@@ -1294,15 +1314,25 @@ KFR_INTRINSIC void butterfly11(cvec<T, N> a00, cvec<T, N> a01, cvec<T, N> a02, c
}

template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5r1 = static_cast<T>(0.30901699437494742410229341718 - 1.0);
static constexpr KFR_INTRINSIC cvec<T, N> tw5r1()
{
return static_cast<T>(0.30901699437494742410229341718 - 1.0);
}
template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5i1 =
static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>();
static constexpr KFR_INTRINSIC cvec<T, N> tw5i1()
{
return static_cast<T>(0.95105651629515357211643933338) * twiddleimagmask<T, N, inverse>();
}
template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5r2 = static_cast<T>(-0.80901699437494742410229341718 - 1.0);
static constexpr KFR_INTRINSIC cvec<T, N> tw5r2()
{
return static_cast<T>(-0.80901699437494742410229341718 - 1.0);
}
template <typename T, size_t N, bool inverse>
const static cvec<T, N> tw5i2 =
static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>();
static constexpr KFR_INTRINSIC cvec<T, N> tw5i2()
{
return static_cast<T>(0.58778525229247312916870595464) * twiddleimagmask<T, N, inverse>();
}

template <size_t N, bool inverse = false, typename T>
KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, const cvec<T, N>& a02,
@@ -1315,11 +1345,11 @@ KFR_INTRINSIC void butterfly5(const cvec<T, N>& a00, const cvec<T, N>& a01, cons
const cvec<T, N> dif2 = swap<2>(a02 - a03);
w00 = a00 + sum1 + sum2;

const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse> + sum2 * tw5r2<T, N, inverse>;
const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse> + sum2 * tw5r1<T, N, inverse>;
const cvec<T, N> s1 = w00 + sum1 * tw5r1<T, N, inverse>() + sum2 * tw5r2<T, N, inverse>();
const cvec<T, N> s2 = w00 + sum1 * tw5r2<T, N, inverse>() + sum2 * tw5r1<T, N, inverse>();

const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse> + dif2 * tw5i2<T, N, inverse>;
const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse> - dif2 * tw5i1<T, N, inverse>;
const cvec<T, N> d1 = dif1 * tw5i1<T, N, inverse>() + dif2 * tw5i2<T, N, inverse>();
const cvec<T, N> d2 = dif1 * tw5i2<T, N, inverse>() - dif2 * tw5i1<T, N, inverse>();

w01 = s1 + d1;
w04 = s1 - d1;
@@ -1690,16 +1720,15 @@ template <typename T, bool inverse, typename Tstride = csize_t<1>>
KFR_INTRINSIC void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in,
complex<T>*, const complex<T>* twiddle, Tstride ostride = {})
{
cswitch(
csizes_t<11, 13>(), radix,
[&](auto radix_) CMT_INLINE_LAMBDA {
constexpr size_t width = vector_width<T>;
spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride);
},
[&]() CMT_INLINE_LAMBDA {
constexpr size_t width = vector_width<T>;
generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
});
cswitch(csizes_t<11, 13>(), radix,
[&](auto radix_) CMT_INLINE_LAMBDA {
constexpr size_t width = vector_width<T>;
spec_generic_butterfly_w<width>(radix_, cbool_t<inverse>(), out, in, twiddle, ostride);
},
[&]() CMT_INLINE_LAMBDA {
constexpr size_t width = vector_width<T>;
generic_butterfly_w<width>(radix, cbool_t<inverse>(), out, in, twiddle, ostride);
});
}

template <typename T, size_t N>
@@ -253,8 +253,7 @@ if (NOT SKIP_TESTS)
${PROJECT_BINARY_DIR}/bin/all_tests_${A})
endif ()
endforeach ()
else ()
add_test(NAME all_tests COMMAND ${EMULATOR}
${PROJECT_BINARY_DIR}/bin/all_tests)
endif ()
add_test(NAME all_tests COMMAND ${SDE} ${SDE_ARCH_${CPU_ARCH}} -chip_check_exe_only
-- ${PROJECT_BINARY_DIR}/bin/all_tests)
endif ()

0 comments on commit 25eceff

Please sign in to comment.
You can’t perform that action at this time.