Skip to content

Commit

Permalink
[KT] Skip all test cases with an error "LLVM ERROR: SLM size exceeds …
Browse files Browse the repository at this point in the history
…target limits" (#1046)
  • Loading branch information
SergeyKopienko committed Aug 10, 2023
1 parent 50878b0 commit 6afed28
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 26 deletions.
9 changes: 7 additions & 2 deletions include/oneapi/dpl/experimental/kt/esimd_radix_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ template <bool _IsAscending, std::uint8_t _RadixBits, typename _KernelParam, typ
sycl::event
radix_sort(sycl::queue __q, _Range&& __rng, _KernelParam __param)
{
static_assert(_RadixBits == 8);

static_assert(32 <= __param.data_per_workitem && __param.data_per_workitem <= 512 &&
__param.data_per_workitem % 32 == 0);

const ::std::size_t __n = __rng.size();
assert(__n > 1);

Expand All @@ -42,14 +47,14 @@ radix_sort(sycl::queue __q, _Range&& __rng, _KernelParam __param)
constexpr ::std::uint32_t __one_wg_cap = __data_per_workitem * __workgroup_size;
if (__n <= __one_wg_cap)
{
// TODO: support different RadixBits values (only 7 or 8 are currently supported), WorkGroupSize and DataPerWorkItem
// TODO: support different RadixBits values (only 7 or 8 are currently supported), WorkGroupSize
return one_wg<_KernelName, _IsAscending, _RadixBits, __data_per_workitem, __workgroup_size>(
__q, ::std::forward<_Range>(__rng), __n);
}
else
{
// TODO: avoid kernel duplication (generate the output storage with the same type as input storage and use swap)
// TODO: support different RadixBits, WorkGroupSize and DataPerWorkItem
// TODO: support different RadixBits, WorkGroupSize
return onesweep<_KernelName, _IsAscending, _RadixBits, __data_per_workitem, __workgroup_size>(
__q, ::std::forward<_Range>(__rng), __n);
}
Expand Down
66 changes: 42 additions & 24 deletions test/kt/esimd_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ constexpr bool Descending = false;
#define TEST_WORK_GROUP_SIZE 64
#endif

constexpr std::uint8_t RadixBits = 8;

using ParamType = oneapi::dpl::experimental::kt::kernel_param<TEST_DATA_PER_WORK_ITEM, TEST_WORK_GROUP_SIZE>;
constexpr ParamType kernel_parameters;

Expand Down Expand Up @@ -386,38 +388,54 @@ void test_general_cases(std::size_t size, KernelParam param)
}
#endif // TEST_DPCPP_BACKEND_PRESENT

template <typename T, typename KernelParam>
bool
can_run_test(KernelParam param)
{
sycl::queue q = TestUtils::get_test_queue();

const ::std::size_t __max_slm_size = q.get_device().template get_info<sycl::info::device::local_mem_size>();

// skip tests with error: LLVM ERROR: SLM size exceeds target limits
return sizeof(T) * param.data_per_workitem * param.workgroup_size < __max_slm_size;
}

int main()
{
#if TEST_DPCPP_BACKEND_PRESENT
const std::vector<std::size_t> sizes = {
1, 6, 16, 43, 256, 316, 2048, 5072, 8192, 14001, 1<<14,
(1<<14)+1, 50000, 67543, 100'000, 1<<17, 179'581, 250'000, 1<<18,
(1<<18)+1, 500'000, 888'235, 1'000'000, 1<<20, 10'000'000
};

try
const bool _can_run_test = can_run_test<TEST_DATA_TYPE>(kernel_parameters);
if (_can_run_test)
{
#if TEST_LONG_RUN
for(auto size: sizes)
const std::vector<std::size_t> sizes = {
1, 6, 16, 43, 256, 316, 2048, 5072, 8192, 14001, 1<<14,
(1<<14)+1, 50000, 67543, 100'000, 1<<17, 179'581, 250'000, 1<<18,
(1<<18)+1, 500'000, 888'235, 1'000'000, 1<<20, 10'000'000
};

try
{
test_general_cases<TEST_DATA_TYPE, Ascending, /*RadixBits*/8>(size, kernel_parameters);
test_general_cases<TEST_DATA_TYPE, Descending, /*RadixBits*/8>(size, kernel_parameters);
}
test_small_sizes<TEST_DATA_TYPE, Ascending, /*RadixBits*/8>(kernel_parameters);
#if TEST_LONG_RUN
for(auto size: sizes)
{
test_general_cases<TEST_DATA_TYPE, Ascending, RadixBits>(size, kernel_parameters);
test_general_cases<TEST_DATA_TYPE, Descending, RadixBits>(size, kernel_parameters);
}
test_small_sizes<TEST_DATA_TYPE, Ascending, RadixBits>(kernel_parameters);
#else
for(auto size: sizes)
for(auto size: sizes)
{
test_usm<TEST_DATA_TYPE, Ascending, RadixBits, sycl::usm::alloc::shared>(size, kernel_parameters);
test_usm<TEST_DATA_TYPE, Descending, RadixBits, sycl::usm::alloc::shared>(size, kernel_parameters);
}
#endif // TEST_LONG_RUN
}
catch (const ::std::exception& exc)
{
test_usm<TEST_DATA_TYPE, Ascending, /*RadixBits*/8, sycl::usm::alloc::shared>(size, kernel_parameters);
test_usm<TEST_DATA_TYPE, Descending, /*RadixBits*/8, sycl::usm::alloc::shared>(size, kernel_parameters);
std::cerr << "Exception: " << exc.what() << std::endl;
return EXIT_FAILURE;
}
#endif // TEST_LONG_RUN
}
catch (const ::std::exception& exc)
{
std::cout << "Exception: " << exc.what() << std::endl;
return EXIT_FAILURE;
}
#endif // TEST_DPCPP_BACKEND_PRESENT
}

return TestUtils::done(TEST_DPCPP_BACKEND_PRESENT);
return TestUtils::done(TEST_DPCPP_BACKEND_PRESENT && _can_run_test);
}

0 comments on commit 6afed28

Please sign in to comment.