forked from intel/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add 'feature' tests to InvokeSimd (intel#1443)
- Loading branch information
Showing
16 changed files
with
1,248 additions
and
5 deletions.
There are no files selected for viewing
18 changes: 18 additions & 0 deletions
18
SYCL/InvokeSimd/feature/IMPL_SUBGROUP/SPMD_invoke_ESIMD_external.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// TODO: enable on Windows once driver is ready | ||
// REQUIRES: gpu && linux | ||
// UNSUPPORTED: cuda || hip | ||
// | ||
// TODO: enable when Jira ticket resolved | ||
// XFAIL: gpu | ||
// | ||
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../SPMD_invoke_ESIMD_external.cpp -o %t.out | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// VISALTO enable run | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
|
||
/* | ||
* This tests is the same as InvokeSimd/feature/SPMD_invoke_ESIMD_external.cpp, | ||
* but compiles without optional subgroup attribute specified and intended to | ||
* check that compiler is able to choose subgroup size correctly. | ||
*/ |
16 changes: 16 additions & 0 deletions
16
SYCL/InvokeSimd/feature/IMPL_SUBGROUP/invoke_simd_struct.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// TODO: enable on Windows once driver is ready | ||
// REQUIRES: gpu && linux | ||
// UNSUPPORTED: cuda || hip | ||
// | ||
// Check that full compilation works: | ||
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../invoke_simd_struct.cpp -o %t.out | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// VISALTO enable run | ||
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
|
||
/* | ||
* This tests is the same as InvokeSimd/feature/invoke_simd_struct.cpp, but | ||
* compiles without optional subgroup attribute specified and intended to check | ||
* that compiler is able to choose subgroup size correctly. | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
// TODO: enable on Windows once driver is ready | ||
// REQUIRES: gpu && linux | ||
// UNSUPPORTED: cuda || hip | ||
// | ||
// TODO: enable after simd_mask supported | ||
// XFAIL: gpu | ||
// | ||
// Check that full compilation works: | ||
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt.cpp -o %t.out | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// VISALTO enable run | ||
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
|
||
/* | ||
* This tests is the same as InvokeSimd/feature/popcnt.cpp, but compiles without | ||
* optional subgroup attribute specified and intended to check that compiler is | ||
* able to choose subgroup size correctly. | ||
*/ |
7 changes: 4 additions & 3 deletions
7
...d/spec/IMPL_SUBGROUP/invoke_simd_conv.cpp → ...Simd/feature/IMPL_SUBGROUP/popcnt_emu.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,16 @@ | ||
// TODO: enable on Windows once driver is ready | ||
// REQUIRES: gpu && linux | ||
// UNSUPPORTED: cuda || hip | ||
|
||
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../../invoke_simd_conv.cpp -o %t.out | ||
// | ||
// Check that full compilation works: | ||
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../popcnt_emu.cpp -o %t.out | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// VISALTO enable run | ||
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
|
||
/* | ||
* This tests is the same as InvokeSimd/spec/invoke_simd_conv.cpp, but compiles | ||
* This tests is the same as InvokeSimd/feature/popcnt_emu.cpp, but compiles | ||
* without optional subgroup attribute specified and intended to check that | ||
* compiler is able to choose subgroup size correctly. | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// TODO: enable on Windows once driver is ready | ||
// REQUIRES: gpu && linux | ||
// UNSUPPORTED: cuda || hip | ||
// | ||
// Check that full compilation works: | ||
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../scale.cpp -o %t.out | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// VISALTO enable run | ||
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
|
||
/* | ||
* This tests is the same as InvokeSimd/feature/scale.cpp, but compiles without | ||
* optional subgroup attribute specified and intended to check that compiler is | ||
* able to choose subgroup size correctly. | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// TODO: enable on Windows once driver is ready | ||
// REQUIRES: gpu && linux | ||
// UNSUPPORTED: cuda || hip | ||
// | ||
// Check that full compilation works: | ||
// RUN: %clangxx -DIMPL_SUBGROUP -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %S/../void_retval.cpp -o %t.out | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// VISALTO enable run | ||
// RUN: env IGC_VISALTO=63 IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
|
||
/* | ||
* This tests is the same as InvokeSimd/feature/void_retval.cpp, but compiles | ||
* without optional subgroup attribute specified and intended to check that | ||
* compiler is able to choose subgroup size correctly. | ||
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
// TODO: enable on Windows once driver is ready | ||
// REQUIRES: gpu && linux | ||
// UNSUPPORTED: cuda || hip | ||
// | ||
// TODO: enable when Jira ticket resolved | ||
// XFAIL: gpu | ||
// | ||
// RUN: %clangxx -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr %s -o %t.out | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
// | ||
// VISALTO enable run | ||
// RUN: env IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 %GPU_RUN_PLACEHOLDER %t.out | ||
|
||
/* | ||
* This test checks the case of calling the same external function from the SPMD | ||
* and ESIMD kernels. | ||
*/ | ||
|
||
#include <sycl/ext/intel/esimd.hpp> | ||
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp> | ||
#include <sycl/ext/oneapi/experimental/uniform.hpp> | ||
#include <sycl/sycl.hpp> | ||
|
||
#include <functional> | ||
#include <iostream> | ||
#include <type_traits> | ||
|
||
/* Subgroup size attribute is optional | ||
* In case it is absent compiler decides what subgroup size to use | ||
*/ | ||
#ifdef IMPL_SUBGROUP | ||
#define SUBGROUP_ATTR | ||
#else | ||
#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] | ||
#endif | ||
|
||
using namespace sycl::ext::oneapi::experimental; | ||
namespace esimd = sycl::ext::intel::esimd; | ||
constexpr int VL = 16; | ||
|
||
esimd::simd<float, VL> ESIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION { | ||
esimd::simd<float, VL> res; | ||
res.copy_from(A + i); | ||
return res; | ||
} | ||
|
||
[[intel::device_indirectly_callable]] SYCL_EXTERNAL | ||
simd<float, VL> __regcall SIMD_CALLEE(float *A, int i) SYCL_ESIMD_FUNCTION { | ||
esimd::simd<float, VL> res = ESIMD_CALLEE(A, i); | ||
return res; | ||
} | ||
|
||
using namespace sycl; | ||
|
||
int main() { | ||
constexpr unsigned Size = 1024; | ||
constexpr unsigned GroupSize = 4 * VL; | ||
|
||
auto q = queue{gpu_selector_v}; | ||
auto dev = q.get_device(); | ||
std::cout << "Running on " << dev.get_info<sycl::info::device::name>() | ||
<< "\n"; | ||
|
||
float *A = static_cast<float *>(malloc_shared(Size * sizeof(float), q)); | ||
float *B = static_cast<float *>(malloc_shared(Size * sizeof(float), q)); | ||
float *C = static_cast<float *>(malloc_shared(Size * sizeof(float), q)); | ||
|
||
for (unsigned i = 0; i < Size; ++i) { | ||
A[i] = i; | ||
B[i] = C[i] = -1; | ||
} | ||
|
||
try { | ||
sycl::range<1> GlobalRange{Size}; | ||
// Number of workitems in each workgroup. | ||
sycl::range<1> LocalRange{GroupSize}; | ||
sycl::nd_range<1> Range(GlobalRange, LocalRange); | ||
|
||
auto e = q.submit([&](handler &cgh) { | ||
cgh.parallel_for<class TestInvokeSimd>( | ||
Range, [=](nd_item<1> ndi) SUBGROUP_ATTR { | ||
sub_group sg = ndi.get_sub_group(); | ||
group<1> g = ndi.get_group(); | ||
uint32_t i = sg.get_group_linear_id() * VL + | ||
g.get_group_linear_id() * GroupSize; | ||
uint32_t wi_id = i + sg.get_local_id(); | ||
|
||
float res = invoke_simd(sg, SIMD_CALLEE, uniform{A}, uniform{i}); | ||
B[wi_id] = res; | ||
}); | ||
}); | ||
e.wait(); | ||
} catch (sycl::exception const &e) { | ||
sycl::free(A, q); | ||
sycl::free(B, q); | ||
sycl::free(C, q); | ||
|
||
std::cout << "SYCL exception caught: " << e.what() << '\n'; | ||
return e.code().value(); | ||
} | ||
|
||
try { | ||
sycl::range<1> GlobalRange{Size}; | ||
// Number of workitems in each workgroup. | ||
sycl::range<1> LocalRange{VL}; | ||
sycl::nd_range<1> Range(GlobalRange, LocalRange); | ||
|
||
auto e = q.submit([&](handler &cgh) { | ||
cgh.parallel_for<class TestExternalCall>( | ||
Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL { | ||
uint32_t i = ndi.get_group(0) * VL; | ||
|
||
esimd::simd<float, VL> res(SIMD_CALLEE(B, i)); | ||
res.copy_to(C + i); | ||
}); | ||
}); | ||
e.wait(); | ||
} catch (sycl::exception const &e) { | ||
sycl::free(A, q); | ||
sycl::free(B, q); | ||
sycl::free(C, q); | ||
|
||
std::cout << "SYCL exception caught: " << e.what() << '\n'; | ||
return e.code().value(); | ||
} | ||
|
||
int err_cnt = 0; | ||
|
||
for (unsigned i = 0; i < Size; ++i) { | ||
if (A[i] != B[i] || B[i] != C[i]) { | ||
if (++err_cnt < 10) { | ||
std::cout << "failed at index " << i << ", " << A[i] << " != " << B[i] | ||
<< " != " << C[i] << "\n"; | ||
} | ||
} | ||
} | ||
|
||
sycl::free(A, q); | ||
sycl::free(B, q); | ||
sycl::free(C, q); | ||
|
||
if (err_cnt > 0) { | ||
std::cout << " pass rate: " | ||
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% (" | ||
<< (Size - err_cnt) << "/" << Size << ")\n"; | ||
} | ||
|
||
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n"); | ||
return err_cnt; | ||
} |
Oops, something went wrong.