diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index ebb257ab33821..cfae75d093421 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -362,6 +362,7 @@ enum { ELFOSABI_FENIXOS = 16, // FenixOS ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture. + ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture. ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime @@ -385,6 +386,12 @@ enum { ELFABIVERSION_AMDGPU_HSA_V6 = 4, }; +// CUDA OS ABI Version identification. +enum { + ELFABIVERSION_CUDA_V1 = 7, + ELFABIVERSION_CUDA_V2 = 8, +}; + #define ELF_RELOC(name, value) name = value, // X86_64 relocations. @@ -921,9 +928,15 @@ enum { // NVPTX specific e_flags. enum : unsigned { - // Processor selection mask for EF_CUDA_SM* values. + // Processor selection mask for EF_CUDA_SM* values prior to blackwell. EF_CUDA_SM = 0xff, + // Processor selection mask for EF_CUDA_SM* values following blackwell. + EF_CUDA_SM_MASK = 0xff00, + + // Processor selection mask for EF_CUDA_SM* values following blackwell. + EF_CUDA_SM_OFFSET = 8, + // SM based processor values. EF_CUDA_SM20 = 0x14, EF_CUDA_SM21 = 0x15, @@ -943,9 +956,15 @@ enum : unsigned { EF_CUDA_SM80 = 0x50, EF_CUDA_SM86 = 0x56, EF_CUDA_SM87 = 0x57, + EF_CUDA_SM88 = 0x58, EF_CUDA_SM89 = 0x59, - // The sm_90a variant uses the same machine flag. EF_CUDA_SM90 = 0x5a, + EF_CUDA_SM100 = 0x64, + EF_CUDA_SM101 = 0x65, + EF_CUDA_SM103 = 0x67, + EF_CUDA_SM110 = 0x6e, + EF_CUDA_SM120 = 0x78, + EF_CUDA_SM121 = 0x79, // Unified texture binding is enabled. EF_CUDA_TEXMODE_UNIFIED = 0x100, @@ -954,12 +973,15 @@ enum : unsigned { // The target is using 64-bit addressing. EF_CUDA_64BIT_ADDRESS = 0x400, // Set when using the sm_90a processor. - EF_CUDA_ACCELERATORS = 0x800, + EF_CUDA_ACCELERATORS_V1 = 0x800, // Undocumented software feature. EF_CUDA_SW_FLAG_V2 = 0x1000, // Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values. EF_CUDA_VIRTUAL_SM = 0xff0000, + + // Set when using an accelerator variant like sm_100a. + EF_CUDA_ACCELERATORS = 0x8, }; // ELF Relocation types for BPF diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 103686884e705..30a9dd35f624e 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1479,6 +1479,7 @@ template Triple::OSType ELFObjectFile::getOS() const { case ELF::ELFOSABI_OPENBSD: return Triple::OpenBSD; case ELF::ELFOSABI_CUDA: + case ELF::ELFOSABI_CUDA_V2: return Triple::CUDA; case ELF::ELFOSABI_AMDGPU_HSA: return Triple::AMDHSA; diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 0e13d32bbe522..a6b56ae77cf21 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -620,7 +620,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const { StringRef ELFObjectFileBase::getNVPTXCPUName() const { assert(getEMachine() == ELF::EM_CUDA); - unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM; + unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1 + ? getPlatformFlags() & ELF::EF_CUDA_SM + : (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >> + ELF::EF_CUDA_SM_OFFSET; switch (SM) { // Fermi architecture. @@ -672,6 +675,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const { return "sm_86"; case ELF::EF_CUDA_SM87: return "sm_87"; + case ELF::EF_CUDA_SM88: + return "sm_88"; // Ada architecture. case ELF::EF_CUDA_SM89: @@ -679,7 +684,30 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const { // Hopper architecture. case ELF::EF_CUDA_SM90: - return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90"; + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a" + : "sm_90"; + + // Blackwell architecture. + case ELF::EF_CUDA_SM100: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a" + : "sm_100"; + case ELF::EF_CUDA_SM101: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_101a" + : "sm_101"; + case ELF::EF_CUDA_SM103: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a" + : "sm_103"; + case ELF::EF_CUDA_SM110: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a" + : "sm_110"; + + // Blackwell architecture. + case ELF::EF_CUDA_SM120: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a" + : "sm_120"; + case ELF::EF_CUDA_SM121: + return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_121a" + : "sm_121"; default: llvm_unreachable("Unknown EF_CUDA_SM value"); } diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 101079f09e1d2..3fd167df1ecc5 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1083,26 +1083,26 @@ const EnumEntry ElfObjectFileType[] = { }; const EnumEntry ElfOSABI[] = { - {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE}, - {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX}, - {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD}, - {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX}, - {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD}, - {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS}, - {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX}, - {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX}, - {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD}, - {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64}, - {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO}, - {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD}, - {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS}, - {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK}, - {"AROS", "AROS", ELF::ELFOSABI_AROS}, - {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, - {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, - {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, - {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE} -}; + {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE}, + {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX}, + {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD}, + {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX}, + {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD}, + {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS}, + {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX}, + {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX}, + {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD}, + {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64}, + {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO}, + {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD}, + {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS}, + {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK}, + {"AROS", "AROS", ELF::ELFOSABI_AROS}, + {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS}, + {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI}, + {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA}, + {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2}, + {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}}; const EnumEntry AMDGPUElfOSABI[] = { {"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA}, @@ -1666,16 +1666,60 @@ const EnumEntry ElfHeaderAMDGPUFlagsABIVersion4[] = { }; const EnumEntry ElfHeaderNVPTXFlags[] = { - ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"), - ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"), - ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"), - ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"), - ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"), - ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"), - ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"), - ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"), - ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"), - ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"), + ENUM_ENT(EF_CUDA_SM20, "sm_20"), + ENUM_ENT(EF_CUDA_SM21, "sm_21"), + ENUM_ENT(EF_CUDA_SM30, "sm_30"), + ENUM_ENT(EF_CUDA_SM32, "sm_32"), + ENUM_ENT(EF_CUDA_SM35, "sm_35"), + ENUM_ENT(EF_CUDA_SM37, "sm_37"), + ENUM_ENT(EF_CUDA_SM50, "sm_50"), + ENUM_ENT(EF_CUDA_SM52, "sm_52"), + ENUM_ENT(EF_CUDA_SM53, "sm_53"), + ENUM_ENT(EF_CUDA_SM60, "sm_60"), + ENUM_ENT(EF_CUDA_SM61, "sm_61"), + ENUM_ENT(EF_CUDA_SM62, "sm_62"), + ENUM_ENT(EF_CUDA_SM70, "sm_70"), + ENUM_ENT(EF_CUDA_SM72, "sm_72"), + ENUM_ENT(EF_CUDA_SM75, "sm_75"), + ENUM_ENT(EF_CUDA_SM80, "sm_80"), + ENUM_ENT(EF_CUDA_SM86, "sm_86"), + ENUM_ENT(EF_CUDA_SM87, "sm_87"), + ENUM_ENT(EF_CUDA_SM88, "sm_88"), + ENUM_ENT(EF_CUDA_SM89, "sm_89"), + ENUM_ENT(EF_CUDA_SM90, "sm_90"), + ENUM_ENT(EF_CUDA_SM100, "sm_100"), + ENUM_ENT(EF_CUDA_SM101, "sm_101"), + ENUM_ENT(EF_CUDA_SM103, "sm_103"), + ENUM_ENT(EF_CUDA_SM110, "sm_110"), + ENUM_ENT(EF_CUDA_SM120, "sm_120"), + ENUM_ENT(EF_CUDA_SM121, "sm_121"), + ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"), + ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"), + ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"), + ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"), + ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"), + ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"), + ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"), + ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"), + ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"), + ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"), + ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"), + ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"), + ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"), + ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"), + ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"), + ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"), + ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"), + ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"), + ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"), + ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"), + ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"), + ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"), + ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"), + ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"), + ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"), + ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"), + ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"), }; const EnumEntry ElfHeaderRISCVFlags[] = { @@ -3650,10 +3694,16 @@ template void GNUELFDumper::printFileHeaders() { else if (e.e_machine == EM_XTENSA) ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags), unsigned(ELF::EF_XTENSA_MACH)); - else if (e.e_machine == EM_CUDA) + else if (e.e_machine == EM_CUDA) { ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags), unsigned(ELF::EF_CUDA_SM)); - else if (e.e_machine == EM_AMDGPU) { + if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 && + (e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1)) + ElfFlags += "a"; + else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 && + (e.e_flags & ELF::EF_CUDA_ACCELERATORS)) + ElfFlags += "a"; + } else if (e.e_machine == EM_AMDGPU) { switch (e.e_ident[ELF::EI_ABIVERSION]) { default: break; diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp index dfec55432f202..b0ee1984c42ce 100644 --- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp +++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp @@ -60,23 +60,30 @@ static Expected checkMachineImpl(const object::ELFObjectFile &ELFObj, uint16_t EMachine) { const auto Header = ELFObj.getELFFile().getHeader(); if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN) - return createError("Only executable ELF files are supported"); + return createError("only executable ELF files are supported"); if (Header.e_machine == EM_AMDGPU) { if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA) - return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA"); + return createError("invalid AMD OS/ABI, must be AMDGPU_HSA"); if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 && Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6) - return createError("Invalid AMD ABI version, must be version 5 or above"); + return createError("invalid AMD ABI version, must be version 5 or above"); if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 || (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC) - return createError("Unsupported AMDGPU architecture"); + return createError("unsupported AMDGPU architecture"); } else if (Header.e_machine == EM_CUDA) { - if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) - return createError("Invalid CUDA addressing mode"); - if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) - return createError("Unsupported NVPTX architecture"); + if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) { + if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS) + return createError("invalid CUDA addressing mode"); + if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35) + return createError("unsupported NVPTX architecture"); + } else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) { + if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100) + return createError("unsupported NVPTX architecture"); + } else { + return createError("invalid CUDA ABI version"); + } } return Header.e_machine == EMachine; diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index b787376eb1770..71a28fadfd81d 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy { return ElfOrErr.takeError(); // Get the numeric value for the image's `sm_` value. - auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM; + const auto Header = ElfOrErr->getELFFile().getHeader(); + unsigned SM = + Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 + ? Header.e_flags & ELF::EF_CUDA_SM + : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET; CUdevice Device; CUresult Res = cuDeviceGet(&Device, DeviceId);