Skip to content

Commit

Permalink
[LIBOMPTARGET] Adding AMD to llvm-omp-device-info
Browse files Browse the repository at this point in the history
Adding device information print for AMD devices on the
`llvm-omp-device-info` command line tool. The output is inspired by
the rocminfo command line tool.

This commit adds missing HSA functions, enums and structs
needed to query additional information from the HSA agents.
A generic message for the `generic-elf-64bit` plugin is also added

Example of an output:
```
llvm-omp-device-info
Device (0):
    This is a generic-elf-64bit device

Device (1):
    This is a generic-elf-64bit device

Device (2):
    This is a generic-elf-64bit device

Device (3):
    This is a generic-elf-64bit device

Device (4):
    HSA Runtime Version:                1.1
    HSA OpenMP Device Number:           0
    Device Name:                        gfx906
    Vendor Name:                        AMD
    Device Type:                        GPU
    Max Queues:                         128
    Queue Min Size:                     64
    Queue Max Size:                     131072
    Cache:
      L0:                               16384 bytes
      L1:                               8388608 bytes
    Cacheline Size:                     64
    Max Clock Freq(MHz):                1725
    Compute Units:                      60
    SIMD per CU:                        4
    Fast F16 Operation:                 TRUE
    Wavefront Size:                     64
    Workgroup Max Size:                 1024
    Workgroup Max Size per Dimension:
      x:                                1024
      y:                                1024
      z:                                1024
    Max Waves Per CU:                   40
    Max Work-item Per CU:               2560
    Grid Max Size:                      4294967295
    Grid Max Size per Dimension:
      x:                                4294967295
      y:                                4294967295
      z:                                4294967295
    Max fbarriers/Workgrp:              32
    Memory Pools:
      Pool GLOBAL; FLAGS: COARSE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GLOBAL; FLAGS: FINE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GROUP:
        Size:                            65536 bytes
        Allocatable:                     FALSE
        Runtime Alloc Granule:           0 bytes
        Runtime Alloc alignment:         0 bytes
        Accessable by all:               FALSE

Device (5):
    HSA Runtime Version:                1.1
    HSA OpenMP Device Number:           1
    Device Name:                        gfx906
    Vendor Name:                        AMD
    Device Type:                        GPU
    Max Queues:                         128
    Queue Min Size:                     64
    Queue Max Size:                     131072
    Cache:
      L0:                               16384 bytes
      L1:                               8388608 bytes
    Cacheline Size:                     64
    Max Clock Freq(MHz):                1725
    Compute Units:                      60
    SIMD per CU:                        4
    Fast F16 Operation:                 TRUE
    Wavefront Size:                     64
    Workgroup Max Size:                 1024
    Workgroup Max Size per Dimension:
      x:                                1024
      y:                                1024
      z:                                1024
    Max Waves Per CU:                   40
    Max Work-item Per CU:               2560
    Grid Max Size:                      4294967295
    Grid Max Size per Dimension:
      x:                                4294967295
      y:                                4294967295
      z:                                4294967295
    Max fbarriers/Workgrp:              32
    Memory Pools:
      Pool GLOBAL; FLAGS: COARSE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GLOBAL; FLAGS: FINE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GROUP:
        Size:                            65536 bytes
        Allocatable:                     FALSE
        Runtime Alloc Granule:           0 bytes
        Runtime Alloc alignment:         0 bytes
        Accessable by all:               FALSE

Device (6):
    HSA Runtime Version:                1.1
    HSA OpenMP Device Number:           2
    Device Name:                        gfx906
    Vendor Name:                        AMD
    Device Type:                        GPU
    Max Queues:                         128
    Queue Min Size:                     64
    Queue Max Size:                     131072
    Cache:
      L0:                               16384 bytes
      L1:                               8388608 bytes
    Cacheline Size:                     64
    Max Clock Freq(MHz):                1725
    Compute Units:                      60
    SIMD per CU:                        4
    Fast F16 Operation:                 TRUE
    Wavefront Size:                     64
    Workgroup Max Size:                 1024
    Workgroup Max Size per Dimension:
      x:                                1024
      y:                                1024
      z:                                1024
    Max Waves Per CU:                   40
    Max Work-item Per CU:               2560
    Grid Max Size:                      4294967295
    Grid Max Size per Dimension:
      x:                                4294967295
      y:                                4294967295
      z:                                4294967295
    Max fbarriers/Workgrp:              32
    Memory Pools:
      Pool GLOBAL; FLAGS: COARSE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GLOBAL; FLAGS: FINE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GROUP:
        Size:                            65536 bytes
        Allocatable:                     FALSE
        Runtime Alloc Granule:           0 bytes
        Runtime Alloc alignment:         0 bytes
        Accessable by all:               FALSE

Device (7):
    HSA Runtime Version:                1.1
    HSA OpenMP Device Number:           3
    Device Name:                        gfx906
    Vendor Name:                        AMD
    Device Type:                        GPU
    Max Queues:                         128
    Queue Min Size:                     64
    Queue Max Size:                     131072
    Cache:
      L0:                               16384 bytes
      L1:                               8388608 bytes
    Cacheline Size:                     64
    Max Clock Freq(MHz):                1725
    Compute Units:                      60
    SIMD per CU:                        4
    Fast F16 Operation:                 TRUE
    Wavefront Size:                     64
    Workgroup Max Size:                 1024
    Workgroup Max Size per Dimension:
      x:                                1024
      y:                                1024
      z:                                1024
    Max Waves Per CU:                   40
    Max Work-item Per CU:               2560
    Grid Max Size:                      4294967295
    Grid Max Size per Dimension:
      x:                                4294967295
      y:                                4294967295
      z:                                4294967295
    Max fbarriers/Workgrp:              32
    Memory Pools:
      Pool GLOBAL; FLAGS: COARSE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GLOBAL; FLAGS: FINE GRAINED, :
        Size:                            34342961152 bytes
        Allocatable:                     TRUE
        Runtime Alloc Granule:           4096 bytes
        Runtime Alloc alignment:         4096 bytes
        Accessable by all:               FALSE
      Pool GROUP:
        Size:                            65536 bytes
        Allocatable:                     FALSE
        Runtime Alloc Granule:           0 bytes
        Runtime Alloc alignment:         0 bytes
        Accessable by all:               FALSE
```

Differential Revision: https://reviews.llvm.org/D126836
  • Loading branch information
josemonsalve2 committed Jun 9, 2022
1 parent 0abb472 commit 15ed5c0
Show file tree
Hide file tree
Showing 5 changed files with 327 additions and 0 deletions.
3 changes: 3 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp
Expand Up @@ -23,8 +23,11 @@ DLWRAP_INTERNAL(hsa_init, 0);

DLWRAP(hsa_status_string, 2);
DLWRAP(hsa_shut_down, 0);
DLWRAP(hsa_system_get_info, 2);
DLWRAP(hsa_agent_get_info, 3);
DLWRAP(hsa_isa_get_info_alt, 3);
DLWRAP(hsa_iterate_agents, 2);
DLWRAP(hsa_agent_iterate_isas, 3);
DLWRAP(hsa_signal_create, 4);
DLWRAP(hsa_signal_destroy, 1);
DLWRAP(hsa_signal_store_relaxed, 2);
Expand Down
35 changes: 35 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h
Expand Up @@ -55,23 +55,58 @@ typedef enum {
HSA_DEVICE_TYPE_DSP = 2
} hsa_device_type_t;

typedef enum {
HSA_ISA_INFO_NAME = 1,
} hsa_isa_info_t;

typedef enum {
HSA_AGENT_INFO_NAME = 0,
HSA_AGENT_INFO_VENDOR_NAME = 1,
HSA_AGENT_INFO_PROFILE = 4,
HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8,
HSA_AGENT_INFO_GRID_MAX_DIM = 9,
HSA_AGENT_INFO_GRID_MAX_SIZE = 10,
HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11,
HSA_AGENT_INFO_QUEUES_MAX = 12,
HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13,
HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14,
HSA_AGENT_INFO_DEVICE = 17,
HSA_AGENT_INFO_CACHE_SIZE = 18,
HSA_AGENT_INFO_FAST_F16_OPERATION = 24,
} hsa_agent_info_t;

typedef enum {
HSA_SYSTEM_INFO_VERSION_MAJOR = 0,
HSA_SYSTEM_INFO_VERSION_MINOR = 1,
} hsa_system_info_t;

typedef struct hsa_region_s {
uint64_t handle;
} hsa_region_t;

typedef struct hsa_isa_s {
uint64_t handle;
} hsa_isa_t;

hsa_status_t hsa_system_get_info(hsa_system_info_t attribute, void *value);

hsa_status_t hsa_agent_get_info(hsa_agent_t agent, hsa_agent_info_t attribute,
void *value);

hsa_status_t hsa_isa_get_info_alt(hsa_isa_t isa, hsa_isa_info_t attribute,
void *value);

hsa_status_t hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent,
void *data),
void *data);

hsa_status_t hsa_agent_iterate_isas(hsa_agent_t agent,
hsa_status_t (*callback)(hsa_isa_t isa,
void *data),
void *data);

typedef struct hsa_signal_s {
uint64_t handle;
} hsa_signal_t;
Expand Down
17 changes: 17 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa_ext_amd.h
Expand Up @@ -29,9 +29,20 @@ typedef enum hsa_amd_memory_pool_global_flag_s {
} hsa_amd_memory_pool_global_flag_t;

typedef enum {
HSA_AMD_SEGMENT_GLOBAL = 0,
HSA_AMD_SEGMENT_READONLY = 1,
HSA_AMD_SEGMENT_PRIVATE = 2,
HSA_AMD_SEGMENT_GROUP = 3,
} hsa_amd_segment_t;

typedef enum {
HSA_AMD_MEMORY_POOL_INFO_SEGMENT = 0,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS = 1,
HSA_AMD_MEMORY_POOL_INFO_SIZE = 2,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED = 5,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
} hsa_amd_memory_pool_info_t;

typedef enum {
Expand All @@ -43,7 +54,13 @@ typedef enum {
} hsa_amd_memory_pool_access_t;

typedef enum hsa_amd_agent_info_s {
HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001,
HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002,
HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003,
HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009,
HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010
} hsa_amd_agent_info_t;

hsa_status_t hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool,
Expand Down
268 changes: 268 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Expand Up @@ -282,6 +282,16 @@ static void callbackQueue(hsa_status_t status, hsa_queue_t *source,

namespace core {
namespace {

bool checkResult(hsa_status_t Err, const char *ErrMsg) {
if (Err == HSA_STATUS_SUCCESS)
return true;

REPORT("%s", ErrMsg);
REPORT("%s", get_error_string(Err));
return false;
}

void packet_store_release(uint32_t *packet, uint16_t header, uint16_t rest) {
__atomic_store_n(packet, header | (rest << 16), __ATOMIC_RELEASE);
}
Expand Down Expand Up @@ -542,6 +552,256 @@ class RTLDeviceInfoTy : HSALifetime {
return freesignalpool_memcpy(dest, src, size, impl_memcpy_h2d, deviceId);
}

static void printDeviceInfo(int32_t device_id, hsa_agent_t agent) {
char TmpChar[1000];
uint16_t major, minor;
uint32_t TmpUInt;
uint32_t TmpUInt2;
uint32_t CacheSize[4];
bool TmpBool;
uint16_t workgroupMaxDim[3];
hsa_dim3_t gridMaxDim;

// Getting basic information about HSA and Device
core::checkResult(
hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &major),
"Error from hsa_system_get_info when obtaining "
"HSA_SYSTEM_INFO_VERSION_MAJOR\n");
core::checkResult(
hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &minor),
"Error from hsa_system_get_info when obtaining "
"HSA_SYSTEM_INFO_VERSION_MINOR\n");
printf(" HSA Runtime Version: \t\t%u.%u \n", major, minor);
printf(" HSA OpenMP Device Number: \t\t%d \n", device_id);
core::checkResult(
hsa_agent_get_info(
agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, TmpChar),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AMD_AGENT_INFO_PRODUCT_NAME\n");
printf(" Product Name: \t\t\t%s \n", TmpChar);
core::checkResult(hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, TmpChar),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_NAME\n");
printf(" Device Name: \t\t\t%s \n", TmpChar);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, TmpChar),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_NAME\n");
printf(" Vendor Name: \t\t\t%s \n", TmpChar);
hsa_device_type_t devType;
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &devType),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_DEVICE\n");
printf(" Device Type: \t\t\t%s \n",
devType == HSA_DEVICE_TYPE_CPU
? "CPU"
: (devType == HSA_DEVICE_TYPE_GPU
? "GPU"
: (devType == HSA_DEVICE_TYPE_DSP ? "DSP" : "UNKNOWN")));
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_QUEUES_MAX\n");
printf(" Max Queues: \t\t\t%u \n", TmpUInt);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_QUEUE_MIN_SIZE\n");
printf(" Queue Min Size: \t\t\t%u \n", TmpUInt);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_QUEUE_MAX_SIZE\n");
printf(" Queue Max Size: \t\t\t%u \n", TmpUInt);

// Getting cache information
printf(" Cache:\n");

// FIXME: This is deprecated according to HSA documentation. But using
// hsa_agent_iterate_caches and hsa_cache_get_info breaks execution during
// runtime.
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, CacheSize),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_CACHE_SIZE\n");

for (int i = 0; i < 4; i++) {
if (CacheSize[i]) {
printf(" L%u: \t\t\t\t%u bytes\n", i, CacheSize[i]);
}
}

core::checkResult(
hsa_agent_get_info(agent,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE,
&TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AMD_AGENT_INFO_CACHELINE_SIZE\n");
printf(" Cacheline Size: \t\t\t%u \n", TmpUInt);
core::checkResult(
hsa_agent_get_info(
agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY,
&TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY\n");
printf(" Max Clock Freq(MHz): \t\t%u \n", TmpUInt);
core::checkResult(
hsa_agent_get_info(
agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT,
&TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT\n");
printf(" Compute Units: \t\t\t%u \n", TmpUInt);
core::checkResult(hsa_agent_get_info(
agent,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU,
&TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU\n");
printf(" SIMD per CU: \t\t\t%u \n", TmpUInt);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &TmpBool),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU\n");
printf(" Fast F16 Operation: \t\t%s \n", (TmpBool ? "TRUE" : "FALSE"));
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &TmpUInt2),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_WAVEFRONT_SIZE\n");
printf(" Wavefront Size: \t\t\t%u \n", TmpUInt2);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_WORKGROUP_MAX_SIZE\n");
printf(" Workgroup Max Size: \t\t%u \n", TmpUInt);
core::checkResult(hsa_agent_get_info(agent,
HSA_AGENT_INFO_WORKGROUP_MAX_DIM,
workgroupMaxDim),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_WORKGROUP_MAX_DIM\n");
printf(" Workgroup Max Size per Dimension:\n");
printf(" x: \t\t\t\t%u\n", workgroupMaxDim[0]);
printf(" y: \t\t\t\t%u\n", workgroupMaxDim[1]);
printf(" z: \t\t\t\t%u\n", workgroupMaxDim[2]);
core::checkResult(hsa_agent_get_info(
agent,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU,
&TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU\n");
printf(" Max Waves Per CU: \t\t\t%u \n", TmpUInt);
printf(" Max Work-item Per CU: \t\t%u \n", TmpUInt * TmpUInt2);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_GRID_MAX_SIZE\n");
printf(" Grid Max Size: \t\t\t%u \n", TmpUInt);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &gridMaxDim),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_GRID_MAX_DIM\n");
printf(" Grid Max Size per Dimension: \t\t\n");
printf(" x: \t\t\t\t%u\n", gridMaxDim.x);
printf(" y: \t\t\t\t%u\n", gridMaxDim.y);
printf(" z: \t\t\t\t%u\n", gridMaxDim.z);
core::checkResult(
hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &TmpUInt),
"Error returned from hsa_agent_get_info when obtaining "
"HSA_AGENT_INFO_FBARRIER_MAX_SIZE\n");
printf(" Max fbarriers/Workgrp: \t\t%u\n", TmpUInt);

printf(" Memory Pools:\n");
auto CB_mem = [](hsa_amd_memory_pool_t region, void *data) -> hsa_status_t {
std::string TmpStr;
size_t size;
bool alloc, access;
hsa_amd_segment_t segment;
hsa_amd_memory_pool_global_flag_t globalFlags;
core::checkResult(
hsa_amd_memory_pool_get_info(
region, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &globalFlags),
"Error returned from hsa_amd_memory_pool_get_info when obtaining "
"HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS\n");
core::checkResult(hsa_amd_memory_pool_get_info(
region, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment),
"Error returned from hsa_amd_memory_pool_get_info when "
"obtaining HSA_AMD_MEMORY_POOL_INFO_SEGMENT\n");

switch (segment) {
case HSA_AMD_SEGMENT_GLOBAL:
TmpStr = "GLOBAL; FLAGS: ";
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & globalFlags)
TmpStr += "KERNARG, ";
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & globalFlags)
TmpStr += "FINE GRAINED, ";
if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & globalFlags)
TmpStr += "COARSE GRAINED, ";
break;
case HSA_AMD_SEGMENT_READONLY:
TmpStr = "READONLY";
break;
case HSA_AMD_SEGMENT_PRIVATE:
TmpStr = "PRIVATE";
break;
case HSA_AMD_SEGMENT_GROUP:
TmpStr = "GROUP";
break;
}
printf(" Pool %s: \n", TmpStr.c_str());

core::checkResult(hsa_amd_memory_pool_get_info(
region, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size),
"Error returned from hsa_amd_memory_pool_get_info when "
"obtaining HSA_AMD_MEMORY_POOL_INFO_SIZE\n");
printf(" Size: \t\t\t\t %zu bytes\n", size);
core::checkResult(
hsa_amd_memory_pool_get_info(
region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc),
"Error returned from hsa_amd_memory_pool_get_info when obtaining "
"HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED\n");
printf(" Allocatable: \t\t\t %s\n", (alloc ? "TRUE" : "FALSE"));
core::checkResult(
hsa_amd_memory_pool_get_info(
region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &size),
"Error returned from hsa_amd_memory_pool_get_info when obtaining "
"HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE\n");
printf(" Runtime Alloc Granule: \t\t %zu bytes\n", size);
core::checkResult(
hsa_amd_memory_pool_get_info(
region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &size),
"Error returned from hsa_amd_memory_pool_get_info when obtaining "
"HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT\n");
printf(" Runtime Alloc alignment: \t %zu bytes\n", size);
core::checkResult(
hsa_amd_memory_pool_get_info(
region, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access),
"Error returned from hsa_amd_memory_pool_get_info when obtaining "
"HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL\n");
printf(" Accessable by all: \t\t %s\n",
(access ? "TRUE" : "FALSE"));

return HSA_STATUS_SUCCESS;
};
// Iterate over all the memory regions for this agent. Get the memory region
// type and size
hsa_amd_agent_iterate_memory_pools(agent, CB_mem, nullptr);

printf(" ISAs:\n");
auto CB_isas = [](hsa_isa_t isa, void *data) -> hsa_status_t {
char TmpChar[1000];
core::checkResult(hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, TmpChar),
"Error returned from hsa_isa_get_info_alt when "
"obtaining HSA_ISA_INFO_NAME\n");
printf(" Name: \t\t\t\t %s\n", TmpChar);

return HSA_STATUS_SUCCESS;
};
// Iterate over all the memory regions for this agent. Get the memory region
// type and size
hsa_agent_iterate_isas(agent, CB_isas, nullptr);
}

// Record entry point associated with device
void addOffloadEntry(int32_t device_id, __tgt_offload_entry entry) {
assert(device_id < (int32_t)FuncGblEntries.size() &&
Expand Down Expand Up @@ -2338,4 +2598,12 @@ int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {
}
return OFFLOAD_SUCCESS;
}

void __tgt_rtl_print_device_info(int32_t device_id) {
// TODO: Assertion to see if device_id is correct
// NOTE: We don't need to set context for print device info.

DeviceInfo.printDeviceInfo(device_id, DeviceInfo.HSAAgents[device_id]);
}

} // extern "C"

0 comments on commit 15ed5c0

Please sign in to comment.