Skip to content

Commit

Permalink
Merge pull request #1408 from pjaaskel/pocl-r-svm-spv-offsetting
Browse files Browse the repository at this point in the history
PoCL-R: Experimental SVM region offset mitigation via SPV manipulation among other improvements
  • Loading branch information
pjaaskel committed Feb 9, 2024
2 parents bb3c812 + 96eae53 commit 67b9546
Show file tree
Hide file tree
Showing 25 changed files with 771 additions and 189 deletions.
4 changes: 4 additions & 0 deletions config.h.in.cmake
Expand Up @@ -194,6 +194,8 @@

#define LLVM_SPIRV "@LLVM_SPIRV@"

#define LLVM_OPT "@LLVM_OPT@"

#cmakedefine LLVM_MAJOR @LLVM_VERSION_MAJOR@

// minimum LLVM version at which to enable new PM
Expand Down Expand Up @@ -234,6 +236,8 @@

#define POCL_INSTALL_PRIVATE_LIBDIR "@POCL_INSTALL_PRIVATE_LIBDIR@"

#define POCL_INSTALL_LIBDIR "@POCL_INSTALL_PUBLIC_LIBDIR@"

#define POCL_INSTALL_PRIVATE_LIBDIR_REL "@POCL_INSTALL_PRIVATE_LIBDIR_REL@"

#cmakedefine POCL_ASSERTS_BUILD
Expand Down
28 changes: 22 additions & 6 deletions doc/sphinx/source/notes_6_0.rst
Expand Up @@ -2,19 +2,15 @@
Release Notes for PoCL 6.0
**************************

Support for LLVM versions 10 to 13 inclusive has been removed.
LLVM 14 to 17 are supported.

Support for `cl_khr_spir` (SPIR 1.x/2.0) has been removed.
SPIR-V remains supported.

============================
New device driver: cpu-tbb
============================

The cpu-tbb device driver uses the Intel oneAPI Threading Building Blocks (oneTBB)
library for work-group and kernel-level task scheduling. Except for the
scheduling, the driver is identical to the original 'cpu' driver (pthread).
task scheduler, the driver is identical to the original 'cpu' driver (pthread).

===========================
Driver-specific features
Expand All @@ -29,15 +25,35 @@ Support is disabled by default, but can be enabled with CMake option. The
'cpu-minimal' driver does not support OpenMP.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Remote: Basis for the coarse-grain SVM support
Remote
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Basis for the coarse-grain SVM support
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The CG SVM support works only if the client manages to mmap() the
device-side allocated SVM pool to the same address as in the
server-side. This is a work-in-progress, but is usable for testing
client apps and libraries that require CG SVM as it seems to work
often enough.

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
clCompileProgram() and clLinkProgram()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Basic compile and link support. Tested with conformance suite's
``compiler/test_compiler`` test sets execute_after_simple_compile_and_link,
execute_after_simple_compile_and_link_no_device_info and execute_after_two_file_link
test cases, as well as `chipStar <https://github.com/CHIP-SPV/chipStar>`_,
which uses the API for enhanced SPIR-V portability.

===================================
Deprecation/feature removal notices
===================================

Support for LLVM versions 10 to 13 inclusive has been removed.
LLVM 14 to 17 are supported.

Support for `cl_khr_spir` (SPIR 1.x/2.0) has been removed.
SPIR-V remains supported.
11 changes: 10 additions & 1 deletion include/messages.h
Expand Up @@ -112,8 +112,11 @@ extern "C"
MessageType_BuildProgramFromBinary,
MessageType_BuildProgramWithBuiltins,
// Special message type for SPIR-V IL for now. No support for
// vendor-specific ILs yet.
// vendor-specific ILs.
MessageType_BuildProgramFromSPIRV,
MessageType_CompileProgramFromSPIRV,
MessageType_CompileProgramFromSource,
MessageType_LinkProgram,
MessageType_FreeProgram,

// ***********************************************
Expand Down Expand Up @@ -559,6 +562,9 @@ extern "C"
{
uint64_t payload_size;
uint64_t options_len;
// nonzero, if the program's memory accesses should be offset-adjusted
// to match the SVM region starts in the remote device and the host
uint64_t svm_region_offset;
uint32_t num_devices;
uint32_t devices[MAX_REMOTE_DEVICES];
uint32_t platforms[MAX_REMOTE_DEVICES];
Expand Down Expand Up @@ -792,7 +798,10 @@ extern "C"
case MessageType_BuildProgramFromSource:
case MessageType_BuildProgramFromBinary:
case MessageType_BuildProgramFromSPIRV:
case MessageType_CompileProgramFromSource:
case MessageType_CompileProgramFromSPIRV:
case MessageType_BuildProgramWithBuiltins:
case MessageType_LinkProgram:
body = sizeof (BuildProgramMsg_t);
break;

Expand Down
15 changes: 1 addition & 14 deletions lib/CL/clCommandSVMMemFillKHR.c
Expand Up @@ -41,22 +41,9 @@ POname (clCommandSVMMemFillKHR) (

CMDBUF_VALIDATE_COMMON_HANDLES;

errcode = pocl_svm_memfill_common (
return pocl_svm_memfill_common (
command_buffer, command_queue, CL_COMMAND_SVM_MEMFILL, svm_ptr, size,
pattern, pattern_size, num_sync_points_in_wait_list, NULL, NULL,
sync_point_wait_list, sync_point, &cmd);

if (errcode != CL_SUCCESS)
return errcode;

errcode = pocl_command_record (command_buffer, cmd, sync_point);
if (errcode != CL_SUCCESS)
goto ERROR;

return CL_SUCCESS;

ERROR:
pocl_mem_manager_free_command (cmd);
return errcode;
}
POsym (clCommandSVMMemFillKHR)
56 changes: 21 additions & 35 deletions lib/CL/clEnqueueSVMMemFill.c
Expand Up @@ -77,35 +77,30 @@ pocl_svm_memfill_common (cl_command_buffer_khr command_buffer,
if (errcode != CL_SUCCESS)
return errcode;

/* Utilize the SVM shadow buffers to share code with cl_mem buffer fill
code. */

pocl_svm_ptr *dst_svm_ptr = pocl_find_svm_ptr_in_context (context, svm_ptr);

void *cmd_pattern = pocl_aligned_malloc (pattern_size, pattern_size);
POCL_RETURN_ERROR_COND ((cmd_pattern == NULL), CL_OUT_OF_HOST_MEMORY);

if (command_buffer == NULL)
{
errcode = pocl_check_event_wait_list (
command_queue, num_items_in_wait_list, event_wait_list);
if (errcode != CL_SUCCESS)
return errcode;
errcode = pocl_create_command (cmd, command_queue, command_type, event,
num_items_in_wait_list, event_wait_list,
0, NULL, NULL);
}
size_t offset = svm_ptr - dst_svm_ptr->svm_ptr;
if (command_buffer)
errcode = POname (clCommandFillBufferKHR) (
command_buffer, command_queue, dst_svm_ptr->shadow_cl_mem, pattern,
pattern_size, offset, size, num_items_in_wait_list,
sync_point_wait_list, sync_point, NULL);
else
{
errcode = pocl_create_recorded_command (
cmd, command_buffer, command_queue, command_type,
num_items_in_wait_list, sync_point_wait_list, 0, NULL, NULL);
}
errcode = POname (clEnqueueFillBuffer) (
command_queue, dst_svm_ptr->shadow_cl_mem, pattern, pattern_size,
offset, size, num_items_in_wait_list, event_wait_list, event);

if (errcode != CL_SUCCESS)
return errcode;

_cl_command_node *c = *cmd;

memcpy (cmd_pattern, pattern, pattern_size);
c->command.svm_fill.svm_ptr = svm_ptr;
c->command.svm_fill.size = size;
c->command.svm_fill.pattern = cmd_pattern;
c->command.svm_fill.pattern_size = pattern_size;
if (event != NULL)
(*event)->command_type = command_type;

return CL_SUCCESS;
}
Expand All @@ -117,18 +112,9 @@ POname (clEnqueueSVMMemFill) (cl_command_queue command_queue, void *svm_ptr,
const cl_event *event_wait_list,
cl_event *event) CL_API_SUFFIX__VERSION_2_0
{
cl_int errcode;
_cl_command_node *cmd = NULL;

errcode = pocl_svm_memfill_common (
NULL, command_queue, CL_COMMAND_SVM_MEMFILL, svm_ptr, size, pattern,
pattern_size, num_events_in_wait_list, event_wait_list, event, NULL,
NULL, &cmd);
if (errcode != CL_SUCCESS)
return errcode;

pocl_command_enqueue (command_queue, cmd);

return CL_SUCCESS;
return pocl_svm_memfill_common (NULL, command_queue, CL_COMMAND_SVM_MEMFILL,
svm_ptr, size, pattern, pattern_size,
num_events_in_wait_list, event_wait_list,
event, NULL, NULL, NULL);
}
POsym(clEnqueueSVMMemFill)
4 changes: 2 additions & 2 deletions lib/CL/clFinalizeCommandBufferKHR.c
Expand Up @@ -47,10 +47,10 @@ POname (clFinalizeCommandBufferKHR) (cl_command_buffer_khr command_buffer)
POCL_RETURN_ERROR_COND ((finalized_devs == NULL), CL_OUT_OF_HOST_MEMORY);

cl_command_queue *q = command_buffer->queues;
for (int i = 0; i < command_buffer->num_queues; ++i, ++q)
for (cl_uint i = 0; i < command_buffer->num_queues; ++i, ++q)
{
int is_done = 0;
for (int j = 0; j < num_finalized; ++j)
for (unsigned int j = 0; j < num_finalized; ++j)
{
if (finalized_devs[j] == (*q)->device)
is_done = 1;
Expand Down
4 changes: 2 additions & 2 deletions lib/CL/devices/common_driver.c
Expand Up @@ -482,7 +482,7 @@ pocl_driver_svm_copy (cl_device_id dev,
/* load LLVM IR binary from disk, deletes existing in-memory IR */
static int
pocl_reload_program_bc (char *program_bc_path, cl_program program,
cl_uint device_i)
cl_uint device_i)
{
char *temp_binary = NULL;
uint64_t temp_size = 0;
Expand All @@ -491,7 +491,7 @@ pocl_reload_program_bc (char *program_bc_path, cl_program program,
return -1;
if (program->binaries[device_i])
POCL_MEM_FREE (program->binaries[device_i]);
program->binaries[device_i] = temp_binary;
program->binaries[device_i] = (unsigned char*)temp_binary;
program->binary_sizes[device_i] = temp_size;
return 0;
}
Expand Down
3 changes: 1 addition & 2 deletions lib/CL/devices/devices.c
Expand Up @@ -144,8 +144,7 @@ pocl_get_device_name (unsigned index)
return device->long_name;
}
}
else
return NULL;
return NULL;
}

/* Init function prototype */
Expand Down
52 changes: 32 additions & 20 deletions lib/CL/devices/remote/communication.c
Expand Up @@ -2035,13 +2035,6 @@ pocl_network_create_buffer (remote_device_data_t *ddata, cl_mem mem,
assert (mem->size > 0);
assert (mem->flags != 0);

assert (mem->mem_host_ptr == 0 || (mem->flags & CL_MEM_USES_SVM_POINTER != 0)
|| (size_t)mem->mem_host_ptr < ddata->device_svm_region_start_addr
|| (size_t)mem->mem_host_ptr
> ddata->device_svm_region_start_addr
+ ddata->device_svm_region_start_addr
+ ddata->device_svm_region_size);

nc.request.m.create_buffer.flags = mem->flags;
nc.request.m.create_buffer.size = mem->size;
// see https://www.gnu.org/software/c-intro-and-ref/manual/html_node/Pointer_002dInteger-Conversion.html
Expand Down Expand Up @@ -2274,15 +2267,27 @@ pocl_network_setup_peer_mesh ()
return CL_SUCCESS;
}

/**
* Build, compile or link a program remotely.
*
* \param [i] payload The sources or binaries, if compiling/building, or a list
* of program ids, if linking only. \param [i] is_binary, is_builtin, is_spirv
* Define the input type. If we are only linking previously compiled programs,
* setting these have no difference. \param [i] svm_region_offset Nonzero
* offset if the build process should adjust the memory accessess of the
* program to account for the offset between the SVM regions. \param [i]
* compile_only Set to 1 if compiling without linking. Otherwise 0. \param [i]
* link_only
*
*/
cl_int
pocl_network_build_program (remote_device_data_t *ddata, const void *payload,
size_t payload_size, int is_binary, int is_builtin,
int is_spirv, uint32_t prog_id,
const char *options, char **kernel_meta_bytes,
size_t *kernel_meta_size, uint32_t *devices,
uint32_t *platforms, size_t num_devices,
char **build_logs, char **binaries,
size_t *binary_sizes)
pocl_network_build_or_link_program (
remote_device_data_t *ddata, const void *payload, size_t payload_size,
int is_binary, int is_builtin, int is_spirv, uint32_t prog_id,
const char *options, char **kernel_meta_bytes, size_t *kernel_meta_size,
uint32_t *devices, uint32_t *platforms, size_t num_devices,
char **build_logs, char **binaries, size_t *binary_sizes,
size_t svm_region_offset, int compile_only, int link_only)
{
size_t i, j;
REMOTE_SERV_DATA2;
Expand All @@ -2295,17 +2300,23 @@ pocl_network_build_program (remote_device_data_t *ddata, const void *payload,
POCL_MEASURE_START (REMOTE_BUILD_PROGRAM);

ID_REQUEST (ReadBuffer, prog_id);
if (is_spirv)
nc.request.message_type = MessageType_BuildProgramFromSPIRV;
if (link_only)
nc.request.message_type = MessageType_LinkProgram;
else if (is_spirv)
nc.request.message_type = compile_only ?
MessageType_CompileProgramFromSPIRV : MessageType_BuildProgramFromSPIRV;
else if (is_builtin)
nc.request.message_type = MessageType_BuildProgramWithBuiltins;
else if (is_binary)
nc.request.message_type = MessageType_BuildProgramFromBinary;
else
nc.request.message_type = MessageType_BuildProgramFromSource;
nc.request.message_type = compile_only
? MessageType_CompileProgramFromSource
: MessageType_BuildProgramFromSource;

nc.request.m.build_program.payload_size = payload_size;
nc.request.m.build_program.options_len = options ? strlen (options) : 0;
nc.request.m.build_program.svm_region_offset = svm_region_offset;

nc.request.m.build_program.num_devices = num_devices;
assert (num_devices < MAX_REMOTE_DEVICES);
Expand All @@ -2323,13 +2334,14 @@ pocl_network_build_program (remote_device_data_t *ddata, const void *payload,
= pocl_aligned_malloc (MAX_EXTENDED_ALIGNMENT, MAX_BUILD_SIZE);
nc.rep_extra_size = MAX_BUILD_SIZE;

POCL_MSG_PRINT_REMOTE ("BuildProgram %p\n", netcmd);
POCL_MSG_PRINT_REMOTE ("Compile/Build/LinkProgram %p\n", netcmd);

SEND_REQ_FAST;

wait_on_netcmd (netcmd);

POCL_MSG_PRINT_REMOTE ("BuildProgram reply DATA: %zu\n", nc.reply.data_size);
POCL_MSG_PRINT_REMOTE ("Compile/Build/LinkProgram reply DATA: %zu\n",
nc.reply.data_size);
POCL_MEASURE_FINISH (REMOTE_BUILD_PROGRAM);

char *buffer = nc.rep_extra_data;
Expand Down
8 changes: 5 additions & 3 deletions lib/CL/devices/remote/communication.h
Expand Up @@ -254,7 +254,8 @@ typedef struct remote_server_data_s
} \
while (0)

#define UNSET_REMOTE_ID(map, id) \

#define UNSET_REMOTE_ID(map, id) \
do \
{ \
small_vector_remove_##map##_ids (data, id); \
Expand Down Expand Up @@ -371,12 +372,13 @@ cl_int pocl_network_setup_metadata (char *buffer, size_t total_size,
cl_program program, size_t *num_kernels,
pocl_kernel_metadata_t **kernel_meta);

cl_int pocl_network_build_program (
cl_int pocl_network_build_or_link_program (
remote_device_data_t *ddata, const void *payload, size_t payload_size,
int is_binary, int is_builtin, int is_spirv, uint32_t prog_id,
const char *options, char **kernel_meta_bytes, size_t *kernel_meta_size,
uint32_t *devices, uint32_t *platforms, size_t num_devices,
char **build_log, char **binaries, size_t *binary_sizes);
char **build_log, char **binaries, size_t *binary_sizes,
size_t svm_region_offset, int compile_only, int link_only);

cl_int pocl_network_free_program (remote_device_data_t *ddata,
uint32_t prog_id);
Expand Down

0 comments on commit 67b9546

Please sign in to comment.