Skip to content

Commit

Permalink
Add 'feature' tests to InvokeSimd (intel#1443)
Browse files Browse the repository at this point in the history
  • Loading branch information
fveselov authored and bb-sycl committed Feb 27, 2023
1 parent 5935b4f commit a424e68
Show file tree
Hide file tree
Showing 16 changed files with 1,248 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../SPMD_invoke_ESIMD_external.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/SPMD_invoke_ESIMD_external.cpp,
* but compiles without optional subgroup attribute specified and intended to
* check that compiler is able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/invoke_simd_struct.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../invoke_simd_struct.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/invoke_simd_struct.cpp, but
* compiles without optional subgroup attribute specified and intended to check
* that compiler is able to choose subgroup size correctly.
*/
19 changes: 19 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/popcnt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable after simd_mask supported
// XFAIL: gpu
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/popcnt.cpp, but compiles without
* optional subgroup attribute specified and intended to check that compiler is
* able to choose subgroup size correctly.
*/
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip

// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../../invoke_simd_conv.cpp -o %t.out
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt_emu.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/spec/invoke_simd_conv.cpp, but compiles
* This tests is the same as InvokeSimd/feature/popcnt_emu.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/scale.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../scale.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/scale.cpp, but compiles without
* optional subgroup attribute specified and intended to check that compiler is
* able to choose subgroup size correctly.
*/
16 changes: 16 additions & 0 deletions SYCL/InvokeSimd/feature/IMPL_SUBGROUP/void_retval.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// Check that full compilation works:
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../void_retval.cpp -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This tests is the same as InvokeSimd/feature/void_retval.cpp, but compiles
* without optional subgroup attribute specified and intended to check that
* compiler is able to choose subgroup size correctly.
*/
150 changes: 150 additions & 0 deletions SYCL/InvokeSimd/feature/SPMD_invoke_ESIMD_external.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// TODO: enable on Windows once driver is ready
// REQUIRES: gpu && linux
// UNSUPPORTED: cuda || hip
//
// TODO: enable when Jira ticket resolved
// XFAIL: gpu
//
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out
//
// VISALTO enable run
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out

/*
* This test checks the case of calling the same external function from the SPMD
* and ESIMD kernels.
*/

#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
#include <sycl/ext/oneapi/experimental/uniform.hpp>
#include <sycl/sycl.hpp>

#include <functional>
#include <iostream>
#include <type_traits>

/* Subgroup size attribute is optional
* In case it is absent compiler decides what subgroup size to use
*/
#ifdef IMPL_SUBGROUP
#define SUBGROUP_ATTR
#else
#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]]
#endif

using namespace sycl::ext::oneapi::experimental;
namespace esimd = sycl::ext::intel::esimd;
constexpr int VL = 16;

esimd::simd<float, VL> ESIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> res;
res.copy_from(A + i);
return res;
}

[[intel::device_indirectly_callable]] SYCL_EXTERNAL
simd<float, VL> __regcall SIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION {
esimd::simd<float, VL> res = ESIMD_CALLEE(A, i);
return res;
}

using namespace sycl;

int main() {
constexpr unsigned Size = 1024;
constexpr unsigned GroupSize = 4 * VL;

auto q = queue{gpu_selector_v};
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";

float *A = static_cast<float *>(malloc_shared(Size * sizeof(float), q));
float *B = static_cast<float *>(malloc_shared(Size * sizeof(float), q));
float *C = static_cast<float *>(malloc_shared(Size * sizeof(float), q));

for (unsigned i = 0; i < Size; ++i) {
A[i] = i;
B[i] = C[i] = -1;
}

try {
sycl::range<1> GlobalRange{Size};
// Number of workitems in each workgroup.
sycl::range<1> LocalRange{GroupSize};
sycl::nd_range<1> Range(GlobalRange, LocalRange);

auto e = q.submit([&](handler &cgh) {
cgh.parallel_for<class TestInvokeSimd>(
Range, [=](nd_item<1> ndi) SUBGROUP_ATTR {
sub_group sg = ndi.get_sub_group();
group<1> g = ndi.get_group();
uint32_t i = sg.get_group_linear_id() * VL +
g.get_group_linear_id() * GroupSize;
uint32_t wi_id = i + sg.get_local_id();

float res = invoke_simd(sg, SIMD_CALLEE, uniform{A}, uniform{i});
B[wi_id] = res;
});
});
e.wait();
} catch (sycl::exception const &e) {
sycl::free(A, q);
sycl::free(B, q);
sycl::free(C, q);

std::cout << "SYCL exception caught: " << e.what() << '\n';
return e.code().value();
}

try {
sycl::range<1> GlobalRange{Size};
// Number of workitems in each workgroup.
sycl::range<1> LocalRange{VL};
sycl::nd_range<1> Range(GlobalRange, LocalRange);

auto e = q.submit([&](handler &cgh) {
cgh.parallel_for<class TestExternalCall>(
Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL {
uint32_t i = ndi.get_group(0) * VL;

esimd::simd<float, VL> res(SIMD_CALLEE(B, i));
res.copy_to(C + i);
});
});
e.wait();
} catch (sycl::exception const &e) {
sycl::free(A, q);
sycl::free(B, q);
sycl::free(C, q);

std::cout << "SYCL exception caught: " << e.what() << '\n';
return e.code().value();
}

int err_cnt = 0;

for (unsigned i = 0; i < Size; ++i) {
if (A[i] != B[i] || B[i] != C[i]) {
if (++err_cnt < 10) {
std::cout << "failed at index " << i << ", " << A[i] << " != " << B[i]
<< " != " << C[i] << "\n";
}
}
}

sycl::free(A, q);
sycl::free(B, q);
sycl::free(C, q);

if (err_cnt > 0) {
std::cout << " pass rate: "
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
<< (Size - err_cnt) << "/" << Size << ")\n";
}

std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
return err_cnt;
}

0 comments on commit a424e68

Please sign in to comment.