Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix build issues on SPR and avx512_qsort float16 #25376

Merged
merged 8 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 57 additions & 13 deletions .github/workflows/linux_simd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ jobs:
- uses: ./.github/meson_actions
name: Build/Test

intel_sde:
intel_sde_avx512:
needs: [baseline_only]
runs-on: ubuntu-latest
steps:
Expand All @@ -162,7 +162,7 @@ jobs:

- name: Install Intel SDE
run: |
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/784319/sde-external-9.24.0-2023-07-13-lin.tar.xz
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde

Expand Down Expand Up @@ -198,15 +198,59 @@ jobs:
sde -spr -- python -c "import numpy; numpy.show_config()" &&
sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_simd*
seiko2plus marked this conversation as resolved.
Show resolved Hide resolved

# Can't run on SDE just yet: see https://github.com/numpy/numpy/issues/23545#issuecomment-1659047365
#
#- name: linalg/ufunc/umath tests (SPR)
# run: |
# export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/)
# export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE"
# cd build-install &&
# sde -spr -- python -c "import numpy; numpy.show_config()" &&
# sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \
# $NUMPY_SITE/numpy/_core/tests/test_ufunc.py \
# $NUMPY_SITE/numpy/linalg/tests/test_*
- name: linalg/ufunc/umath tests (TGL)
run: |
export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/)
export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE"
cd build-install &&
sde -tgl -- python -c "import numpy; numpy.show_config()" &&
sde -tgl -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \
$NUMPY_SITE/numpy/_core/tests/test_ufunc.py \
$NUMPY_SITE/numpy/_core/tests/test_multiarray.py \
$NUMPY_SITE/numpy/linalg/tests/test_*


intel_sde_spr:
needs: [baseline_only]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: recursive
fetch-depth: 0
- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
python-version: '3.11'

- name: Install Intel SDE
run: |
curl -o /tmp/sde.tar.xz https://downloadmirror.intel.com/788820/sde-external-9.27.0-2023-09-13-lin.tar.xz
mkdir /tmp/sde && tar -xvf /tmp/sde.tar.xz -C /tmp/sde/
sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde

- name: Install dependencies
run: |
sudo apt update
sudo apt install -y g++-13
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 1
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 1
python -m pip install -r build_requirements.txt
python -m pip install pytest pytest-xdist hypothesis typing_extensions

- name: Build
run: spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr

- name: Meson Log
if: always()
run: cat build/meson-logs/meson-log.txt

- name: linalg/ufunc/umath tests on Intel SPR
run: |
export NUMPY_SITE=$(realpath build-install/usr/lib/python*/site-packages/)
export PYTHONPATH="$PYTHONPATH:$NUMPY_SITE"
cd build-install &&
sde -spr -- python -c "import numpy; numpy.show_config()" &&
sde -spr -- python -m pytest $NUMPY_SITE/numpy/_core/tests/test_umath* \
$NUMPY_SITE/numpy/_core/tests/test_ufunc.py \
$NUMPY_SITE/numpy/_core/tests/test_multiarray.py \
$NUMPY_SITE/numpy/linalg/tests/test_*
43 changes: 1 addition & 42 deletions numpy/_core/src/npysort/x86_simd_qsort_16bit.dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,9 @@

#if defined(NPY_HAVE_AVX512_SPR)
#include "x86-simd-sort/src/avx512fp16-16bit-qsort.hpp"
/*
* Wrapper function declarations to avoid multiple definitions of
* avx512_qsort<uint16_t> and avx512_qsort<int16_t>
*/
void avx512_qsort_uint16(uint16_t*, npy_intp);
void avx512_qsort_int16(int16_t*, npy_intp);
void avx512_qselect_uint16(uint16_t*, npy_intp, npy_intp);
void avx512_qselect_int16(int16_t*, npy_intp, npy_intp);

#include "x86-simd-sort/src/avx512-16bit-qsort.hpp"
#elif defined(NPY_HAVE_AVX512_ICL)
#include "x86-simd-sort/src/avx512-16bit-qsort.hpp"
/* Wrapper function defintions here: */
void avx512_qsort_uint16(uint16_t* arr, npy_intp size)
{
avx512_qsort(arr, size);
}
void avx512_qsort_int16(int16_t* arr, npy_intp size)
{
avx512_qsort(arr, size);
}
void avx512_qselect_uint16(uint16_t* arr, npy_intp kth, npy_intp size)
{
avx512_qselect(arr, kth, size, true);
}
void avx512_qselect_int16(int16_t* arr, npy_intp kth, npy_intp size)
{
avx512_qselect(arr, kth, size, true);
}
#endif

namespace np { namespace qsort_simd {
Expand All @@ -50,20 +25,12 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(Half *arr, npy_intp num, npy_int

template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(uint16_t *arr, npy_intp num, npy_intp kth)
{
#if defined(NPY_HAVE_AVX512_SPR)
avx512_qselect_uint16(arr, kth, num);
#else
avx512_qselect(arr, kth, num);
#endif
}

template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_intp kth)
{
#if defined(NPY_HAVE_AVX512_SPR)
avx512_qselect_int16(arr, kth, num);
#else
avx512_qselect(arr, kth, num);
#endif
}

/*
Expand All @@ -79,19 +46,11 @@ template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size)
}
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, npy_intp size)
{
#if defined(NPY_HAVE_AVX512_SPR)
avx512_qsort_uint16(arr, size);
#else
avx512_qsort(arr, size);
#endif
}
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, npy_intp size)
{
#if defined(NPY_HAVE_AVX512_SPR)
avx512_qsort_int16(arr, size);
#else
avx512_qsort(arr, size);
#endif
}
#endif // NPY_HAVE_AVX512_ICL || SPR

Expand Down
Loading