-
Notifications
You must be signed in to change notification settings - Fork 0
Xsimd #12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Xsimd #12
Changes from all commits
0b9550a
b6eedca
231d175
be5b0fd
e27037d
39856a3
7aa0563
d663683
8b9124b
17521ea
c3adc2a
c21776f
7ff99d0
e7ed800
25538a0
87d7565
f91233a
4b50684
2c56305
7efe21b
84695b9
c3d5882
d789021
01754af
1b8c62c
9ebf1ed
2f757bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,7 +21,17 @@ jobs: | |
| - uses: actions/checkout@v3 | ||
|
|
||
| - name: Install APT packages | ||
| run: sudo apt install clang libboost-exception-dev libbenchmark-dev -y | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt install libboost-exception-dev libbenchmark-dev -y | ||
|
|
||
| - name: Install LLVM and Clang 18 | ||
| run: | | ||
| sudo apt-get update | ||
| sudo apt-get install -y wget gnupg lsb-release | ||
| wget https://apt.llvm.org/llvm.sh | ||
| chmod +x llvm.sh | ||
| sudo ./llvm.sh 18 | ||
|
|
||
| - name: Install Blaze | ||
| run: | | ||
|
|
@@ -36,6 +46,13 @@ jobs: | |
| -DCMAKE_INSTALL_PREFIX=/usr/local/ . \ | ||
| && sudo make install | ||
|
|
||
| - name: Install xsimd | ||
| run: | | ||
| git clone https://github.com/xtensor-stack/xsimd.git | ||
| cd xsimd | ||
| cmake -B build -DCMAKE_INSTALL_PREFIX=/usr/local/ . | ||
| sudo cmake --build build --target install | ||
|
|
||
| - name: Install GTest | ||
| run: | | ||
| git clone https://github.com/google/googletest.git | ||
|
|
@@ -47,7 +64,7 @@ jobs: | |
| run: | | ||
| cmake -B ${{github.workspace}}/build \ | ||
| -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} \ | ||
| -DCMAKE_CXX_COMPILER=clang++ \ | ||
| -DCMAKE_CXX_COMPILER=clang++-18 \ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also here
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Answered above |
||
| -DBLAST_WITH_BENCHMARK=ON \ | ||
| -DBLAST_WITH_TEST=ON | ||
|
|
||
|
|
@@ -60,4 +77,3 @@ jobs: | |
| # Execute tests defined by the CMake configuration. | ||
| # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail | ||
| run: ctest -C ${{env.BUILD_TYPE}} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,7 +56,7 @@ ENV PKG_CONFIG_PATH=/usr/local/lib | |
| RUN mkdir -p blast/build && cd blast/build \ | ||
| && cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ | ||
| -DCMAKE_CXX_COMPILER="clang++-15" \ | ||
| -DCMAKE_CXX_FLAGS="-march=native -mfma -mavx -mavx2 -msse4 -fno-math-errno" \ | ||
| -DCMAKE_CXX_FLAGS="-march=native -mfma -mavx -mavx2 -msse4 -fno-math-errno -DXSIMD_DEFAULT_ARCH='fma3<avx2>'" \ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be nice to detect this automatically (but that is maybe out of scope of this pull request)
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe xsimd does automatically deduce the best possible architecture if you don't specify one. For now I made it explicit because currently we only support avx2. |
||
| -DCMAKE_CXX_FLAGS_RELEASE="-O3 -g -DNDEBUG -ffast-math" .. \ | ||
| -DBLAST_WITH_TEST=ON \ | ||
| -DBLAST_WITH_BENCHMARK=ON \ | ||
|
|
@@ -79,4 +79,3 @@ CMD mkdir -p blast/bench_result/data \ | |
| && mkdir -p blast/bench_result/image \ | ||
| && cd blast \ | ||
| && make -j 1 bench_result/image/dgemm_performance.png bench_result/image/dgemm_performance_ratio.png | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| // Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. | ||
| // Use of this source code is governed by a BSD-style | ||
| // license that can be found in the LICENSE file. | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <blast/math/simd/SimdVec.hpp> | ||
| #include <blast/math/simd/SimdMask.hpp> | ||
| #include <blast/math/simd/SimdIndex.hpp> | ||
| #include <blast/math/simd/SimdSize.hpp> | ||
| #include <blast/math/simd/IsSimdAligned.hpp> | ||
| #include <blast/math/simd/RegisterCapacity.hpp> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,24 +8,21 @@ | |
| #include <blast/math/TransposeFlag.hpp> | ||
| #include <blast/math/StorageOrder.hpp> | ||
| #include <blast/math/TypeTraits.hpp> | ||
| #include <blast/math/simd/Simd.hpp> | ||
| #include <blast/math/simd/IsSimdAligned.hpp> | ||
| #include <blast/math/Simd.hpp> | ||
| #include <blast/util/Assert.hpp> | ||
|
|
||
| #include <blaze/util/Exception.h> | ||
|
|
||
|
|
||
|
|
||
| namespace blast | ||
| { | ||
| template <typename T, bool SO, bool AF, bool PF> | ||
| class DynamicMatrixPointer | ||
| { | ||
| public: | ||
| using ElementType = T; | ||
| using IntrinsicType = typename Simd<std::remove_cv_t<T>>::IntrinsicType; | ||
| using MaskType = typename Simd<std::remove_cv_t<T>>::MaskType; | ||
| using SimdVecType = SimdVec<std::remove_cv_t<T>>; | ||
| using IntrinsicType = SimdVecType::IntrinsicType; | ||
| using MaskType = SimdMask<std::remove_cv_t<T>>; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a question: I guess you remove
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, without it I used to run into compiler errors. We can try removing it and see what happens. |
||
|
|
||
| static bool constexpr storageOrder = SO; | ||
| static bool constexpr aligned = AF; | ||
|
|
@@ -83,9 +80,9 @@ namespace blast | |
| } | ||
|
|
||
|
|
||
| IntrinsicType broadcast() const noexcept | ||
| SimdVecType broadcast() const noexcept | ||
| { | ||
| return blast::broadcast<SS>(ptr_); | ||
| return *ptr_; | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -187,7 +184,7 @@ namespace blast | |
|
|
||
|
|
||
| private: | ||
| static size_t constexpr SS = Simd<std::remove_cv_t<T>>::size; | ||
| static size_t constexpr SS = SimdVecType::size(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would consider making this a function instead.
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what exeactly? making |
||
| static TransposeFlag constexpr majorOrientation = SO == columnMajor ? columnVector : rowVector; | ||
|
|
||
|
|
||
|
|
@@ -253,4 +250,4 @@ namespace blast | |
| { | ||
| return {p, spacing}; | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,8 +4,7 @@ | |
|
|
||
| #pragma once | ||
|
|
||
|
|
||
| #include <blast/math/simd/Simd.hpp> | ||
| #include <blast/math/Simd.hpp> | ||
| #include <blast/util/Assert.hpp> | ||
|
|
||
| #include <type_traits> | ||
|
|
@@ -18,9 +17,9 @@ namespace blast | |
| { | ||
| public: | ||
| using ElementType = T; | ||
| using IntrinsicType = typename Simd<std::remove_cv_t<T>>::IntrinsicType; | ||
| using MaskType = typename Simd<std::remove_cv_t<T>>::MaskType; | ||
| using SimdVecType = SimdVec<std::remove_cv_t<T>>; | ||
| using IntrinsicType = SimdVecType::IntrinsicType; | ||
| using MaskType = SimdMask<std::remove_cv_t<T>>; | ||
|
|
||
| static bool constexpr transposeFlag = TF; | ||
| static bool constexpr aligned = AF; | ||
|
|
@@ -40,7 +39,7 @@ namespace blast | |
| , spacing_ {spacing} | ||
| { | ||
| BLAST_USER_ASSERT(spacing > 0, "Vector element spacing must be positive."); | ||
| BLAST_USER_ASSERT(!AF || reinterpret_cast<ptrdiff_t>(ptr) % (SS * sizeof(T)) == 0, "Pointer is not aligned"); | ||
| BLAST_USER_ASSERT(!AF || isSimdAligned(ptr), "Pointer is not aligned"); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! self-documenting code |
||
| } | ||
|
|
||
|
|
||
|
|
@@ -51,6 +50,7 @@ namespace blast | |
| SimdVecType load() const noexcept | ||
| { | ||
| // Non-optimized | ||
| // TODO: use gather() | ||
| IntrinsicType v; | ||
| for (size_t i = 0; i < SS; ++i) | ||
| v[i] = ptr_[spacing_ * i]; | ||
|
|
@@ -62,18 +62,18 @@ namespace blast | |
| SimdVecType load(MaskType mask) const noexcept | ||
| { | ||
| // Non-optimized | ||
| IntrinsicType v = blast::setzero<std::remove_cv_t<ElementType>, SS>(); | ||
| // TODO: use gather() | ||
| T v[SS]; | ||
| for (size_t i = 0; i < SS; ++i) | ||
| if (mask[i]) | ||
| v[i] = ptr_[spacing_ * i]; | ||
| v[i] = mask[i] ? ptr_[spacing_ * i] : T {}; | ||
|
|
||
| return SimdVecType {v}; | ||
| return SimdVecType {v, false}; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was surprising to me. What is the
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It specifies whether |
||
| } | ||
|
|
||
|
|
||
| IntrinsicType broadcast() const noexcept | ||
| SimdVecType broadcast() const noexcept | ||
| { | ||
| return blast::broadcast<SS>(ptr_); | ||
| return *ptr_; | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -172,7 +172,7 @@ namespace blast | |
|
|
||
|
|
||
| private: | ||
| static size_t constexpr SS = Simd<std::remove_cv_t<T>>::size; | ||
| static size_t constexpr SS = SimdVecType::size(); | ||
|
|
||
|
|
||
| T * ptrOffset(ptrdiff_t i) const noexcept | ||
|
|
@@ -191,4 +191,4 @@ namespace blast | |
| { | ||
| return p.trans(); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any way to make this version indepedent?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be not a problem. But the CI system has clang-14, and on my system the oldest clang I can install with
aptis clang-15. Compiling with older clang requires adding thetypenamekeyword here and there. Also the code builds with gcc now, but I am not sure about older versions.I suggest that we keep it in mind and come back to the compiler support issue later.