Skip to content

Commit

Permalink
[libomptarget][amdgpu] Improve diagnostics on arch mismatch
Browse files Browse the repository at this point in the history
  • Loading branch information
JonChesterfield committed Dec 9, 2020
1 parent e6a1187 commit cab9f69
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 22 deletions.
1 change: 1 addition & 0 deletions openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
Expand Up @@ -57,6 +57,7 @@ add_library(omptarget.rtl.amdgpu SHARED
impl/atmi.cpp
impl/atmi_interop_hsa.cpp
impl/data.cpp
impl/get_elf_mach_gfx_name.cpp
impl/machine.cpp
impl/system.cpp
impl/utils.cpp
Expand Down
53 changes: 53 additions & 0 deletions openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
@@ -0,0 +1,53 @@
#include "get_elf_mach_gfx_name.h"

// This header conflicts with the system elf.h (macros vs enums of the same
// identifier) and contains more up to date values for the enum checked here.
// rtl.cpp uses the system elf.h.
#include "llvm/BinaryFormat/ELF.h"

const char *get_elf_mach_gfx_name(uint32_t EFlags) {
using namespace llvm::ELF;
uint32_t Gfx = (EFlags & EF_AMDGPU_MACH);
switch (Gfx) {
case EF_AMDGPU_MACH_AMDGCN_GFX801:
return "gfx801";
case EF_AMDGPU_MACH_AMDGCN_GFX802:
return "gfx802";
case EF_AMDGPU_MACH_AMDGCN_GFX803:
return "gfx803";
case EF_AMDGPU_MACH_AMDGCN_GFX805:
return "gfx805";
case EF_AMDGPU_MACH_AMDGCN_GFX810:
return "gfx810";
case EF_AMDGPU_MACH_AMDGCN_GFX900:
return "gfx900";
case EF_AMDGPU_MACH_AMDGCN_GFX902:
return "gfx902";
case EF_AMDGPU_MACH_AMDGCN_GFX904:
return "gfx904";
case EF_AMDGPU_MACH_AMDGCN_GFX906:
return "gfx906";
case EF_AMDGPU_MACH_AMDGCN_GFX908:
return "gfx908";
case EF_AMDGPU_MACH_AMDGCN_GFX909:
return "gfx909";
case EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
case EF_AMDGPU_MACH_AMDGCN_GFX1010:
return "gfx1010";
case EF_AMDGPU_MACH_AMDGCN_GFX1011:
return "gfx1011";
case EF_AMDGPU_MACH_AMDGCN_GFX1012:
return "gfx1012";
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
return "gfx1030";
case EF_AMDGPU_MACH_AMDGCN_GFX1031:
return "gfx1031";
case EF_AMDGPU_MACH_AMDGCN_GFX1032:
return "gfx1032";
case EF_AMDGPU_MACH_AMDGCN_GFX1033:
return "gfx1033";
default:
return "--unknown gfx";
}
}
@@ -0,0 +1,8 @@
#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED
#define GET_ELF_MACH_GFX_NAME_H_INCLUDED

#include <stdint.h>

const char *get_elf_mach_gfx_name(uint32_t EFlags);

#endif
80 changes: 58 additions & 22 deletions openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Expand Up @@ -36,6 +36,7 @@
#include "internal.h"

#include "Debug.h"
#include "get_elf_mach_gfx_name.h"
#include "omptargetplugin.h"

#include "llvm/Frontend/OpenMP/OMPGridValues.h"
Expand Down Expand Up @@ -92,14 +93,6 @@ uint32_t TgtStackItemSize = 0;

#include "../../common/elf_common.c"

static bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
const uint16_t amdgcnMachineID = 224;
int32_t r = elf_check_machine(image, amdgcnMachineID);
if (!r) {
DP("Supported machine ID not found\n");
}
return r;
}

/// Keep entries table per device
struct FuncOrGblEntryTy {
Expand Down Expand Up @@ -319,6 +312,7 @@ class RTLDeviceInfoTy {
std::vector<int> GroupsPerDevice;
std::vector<int> ThreadsPerGroup;
std::vector<int> WarpSize;
std::vector<std::string> GPUName;

// OpenMP properties
std::vector<int> NumTeams;
Expand Down Expand Up @@ -472,6 +466,7 @@ class RTLDeviceInfoTy {
FuncGblEntries.resize(NumberOfDevices);
ThreadsPerGroup.resize(NumberOfDevices);
ComputeUnits.resize(NumberOfDevices);
GPUName.resize(NumberOfDevices);
GroupsPerDevice.resize(NumberOfDevices);
WarpSize.resize(NumberOfDevices);
NumTeams.resize(NumberOfDevices);
Expand Down Expand Up @@ -642,6 +637,40 @@ void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
assert(async_info_ptr->Queue);
async_info_ptr->Queue = 0;
}

bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
const uint16_t amdgcnMachineID = EM_AMDGPU;
int32_t r = elf_check_machine(image, amdgcnMachineID);
if (!r) {
DP("Supported machine ID not found\n");
}
return r;
}

uint32_t elf_e_flags(__tgt_device_image *image) {
char *img_begin = (char *)image->ImageStart;
size_t img_size = (char *)image->ImageEnd - img_begin;

Elf *e = elf_memory(img_begin, img_size);
if (!e) {
DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
return 0;
}

Elf64_Ehdr *eh64 = elf64_getehdr(e);

if (!eh64) {
DP("Unable to get machine ID from ELF file!\n");
elf_end(e);
return 0;
}

uint32_t Flags = eh64->e_flags;

elf_end(e);
DP("ELF Flags: 0x%x\n", Flags);
return Flags;
}
} // namespace

int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
Expand Down Expand Up @@ -676,9 +705,20 @@ int32_t __tgt_rtl_init_device(int device_id) {
DeviceInfo.ComputeUnits[device_id] = compute_units;
DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
}

char GetInfoName[64]; // 64 max size returned by get info
err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
(void *) GetInfoName);
if (err)
DeviceInfo.GPUName[device_id] = "--unknown gpu--";
else {
DeviceInfo.GPUName[device_id] = GetInfoName;
}

if (print_kernel_trace == 4)
fprintf(stderr, "Device#%-2d CU's: %2d\n", device_id,
DeviceInfo.ComputeUnits[device_id]);
fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id,
DeviceInfo.ComputeUnits[device_id],
DeviceInfo.GPUName[device_id].c_str());

// Query attributes to determine number of threads/block and blocks/grid.
uint16_t workgroup_max_dim[3];
Expand Down Expand Up @@ -1038,22 +1078,18 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
return ATMI_STATUS_SUCCESS;
};

atmi_status_t err;
{
err = module_register_from_memory_to_place(
atmi_status_t err = module_register_from_memory_to_place(
(void *)image->ImageStart, img_size, get_gpu_place(device_id),
on_deserialized_data);

check("Module registering", err);
if (err != ATMI_STATUS_SUCCESS) {
char GPUName[64] = "--unknown gpu--";
hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
(void)hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
(void *)GPUName);
fprintf(stderr,
"Possible gpu arch mismatch: %s, please check"
" compiler: -march=<gpu> flag\n",
GPUName);
"Possible gpu arch mismatch: device:%s, image:%s please check"
" compiler flag: -march=<gpu>\n",
DeviceInfo.GPUName[device_id].c_str(),
get_elf_mach_gfx_name(elf_e_flags(image)));
return NULL;
}
}
Expand Down Expand Up @@ -1149,8 +1185,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
void *varptr;
uint32_t varsize;

err = atmi_interop_hsa_get_symbol_info(get_gpu_mem_place(device_id),
e->name, &varptr, &varsize);
atmi_status_t err = atmi_interop_hsa_get_symbol_info(
get_gpu_mem_place(device_id), e->name, &varptr, &varsize);

if (err != ATMI_STATUS_SUCCESS) {
DP("Loading global '%s' (Failed)\n", e->name);
Expand Down Expand Up @@ -1192,7 +1228,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,

atmi_mem_place_t place = get_gpu_mem_place(device_id);
uint32_t kernarg_segment_size;
err = atmi_interop_hsa_get_kernel_info(
atmi_status_t err = atmi_interop_hsa_get_kernel_info(
place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
&kernarg_segment_size);

Expand Down

0 comments on commit cab9f69

Please sign in to comment.