diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 72e8b59e0a409..052b231d62a3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -22,6 +22,7 @@ #include "AMDKernelCodeT.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600AsmPrinter.h" #include "SIMachineFunctionInfo.h" @@ -428,38 +429,43 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( return KernelCodeProperties; } -amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor( - const MachineFunction &MF, - const SIProgramInfo &PI) const { +MCKernelDescriptor +AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF, + const SIProgramInfo &PI) const { const GCNSubtarget &STM = MF.getSubtarget(); const Function &F = MF.getFunction(); const SIMachineFunctionInfo *Info = MF.getInfo(); + MCContext &Ctx = MF.getContext(); - amdhsa::kernel_descriptor_t KernelDescriptor; - memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor)); + MCKernelDescriptor KernelDescriptor; assert(isUInt<32>(PI.ScratchSize)); assert(isUInt<32>(PI.getComputePGMRSrc1(STM))); assert(isUInt<32>(PI.getComputePGMRSrc2())); - KernelDescriptor.group_segment_fixed_size = PI.LDSSize; - KernelDescriptor.private_segment_fixed_size = PI.ScratchSize; + KernelDescriptor.group_segment_fixed_size = + MCConstantExpr::create(PI.LDSSize, Ctx); + KernelDescriptor.private_segment_fixed_size = + MCConstantExpr::create(PI.ScratchSize, Ctx); Align MaxKernArgAlign; - KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign); + KernelDescriptor.kernarg_size = MCConstantExpr::create( + STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx); - KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM); - KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(); - KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF); + KernelDescriptor.compute_pgm_rsrc1 = + MCConstantExpr::create(PI.getComputePGMRSrc1(STM), Ctx); + KernelDescriptor.compute_pgm_rsrc2 = + MCConstantExpr::create(PI.getComputePGMRSrc2(), Ctx); + KernelDescriptor.kernel_code_properties = + MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx); assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0); - if (STM.hasGFX90AInsts()) - KernelDescriptor.compute_pgm_rsrc3 = - CurrentProgramInfo.ComputePGMRSrc3GFX90A; + KernelDescriptor.compute_pgm_rsrc3 = MCConstantExpr::create( + STM.hasGFX90AInsts() ? CurrentProgramInfo.ComputePGMRSrc3GFX90A : 0, Ctx); - if (AMDGPU::hasKernargPreload(STM)) - KernelDescriptor.kernarg_preload = - static_cast(Info->getNumKernargPreloadedSGPRs()); + KernelDescriptor.kernarg_preload = MCConstantExpr::create( + AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0, + Ctx); return KernelDescriptor; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 79326cd3d3289..b8b2718d293e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -28,15 +28,12 @@ class MCCodeEmitter; class MCOperand; namespace AMDGPU { +struct MCKernelDescriptor; namespace HSAMD { class MetadataStreamer; } } // namespace AMDGPU -namespace amdhsa { -struct kernel_descriptor_t; -} - class AMDGPUAsmPrinter final : public AsmPrinter { private: unsigned CodeObjectVersion; @@ -75,9 +72,9 @@ class AMDGPUAsmPrinter final : public AsmPrinter { uint16_t getAmdhsaKernelCodeProperties( const MachineFunction &MF) const; - amdhsa::kernel_descriptor_t getAmdhsaKernelDescriptor( - const MachineFunction &MF, - const SIProgramInfo &PI) const; + AMDGPU::MCKernelDescriptor + getAmdhsaKernelDescriptor(const MachineFunction &MF, + const SIProgramInfo &PI) const; void initTargetStreamer(Module &M); diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 529705479646f..38850f5acadd1 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -8,6 +8,7 @@ #include "AMDKernelCodeT.h" #include "MCTargetDesc/AMDGPUMCExpr.h" +#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" @@ -5417,7 +5418,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (getParser().parseIdentifier(KernelName)) return true; - kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); + AMDGPU::MCKernelDescriptor KD = + getDefaultAmdhsaKernelDescriptor(&getSTI(), getContext()); StringSet<> Seen; @@ -5457,89 +5459,111 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return TokError(".amdhsa_ directives cannot be repeated"); SMLoc ValStart = getLoc(); - int64_t IVal; - if (getParser().parseAbsoluteExpression(IVal)) + const MCExpr *ExprVal; + if (getParser().parseExpression(ExprVal)) return true; SMLoc ValEnd = getLoc(); SMRange ValRange = SMRange(ValStart, ValEnd); - if (IVal < 0) - return OutOfRangeError(ValRange); - + int64_t IVal = 0; uint64_t Val = IVal; + bool EvaluatableExpr; + if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) { + if (IVal < 0) + return OutOfRangeError(ValRange); + Val = IVal; + } #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ - if (!isUInt(VALUE)) \ + if (!isUInt(Val)) \ return OutOfRangeError(RANGE); \ - AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); + AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ + getContext()); + +// Some fields use the parsed value immediately which requires the expression to +// be solvable. +#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ + if (!(RESOLVED)) \ + return Error(IDRange.Start, "directive should have resolvable expression", \ + IDRange); if (ID == ".amdhsa_group_segment_fixed_size") { - if (!isUInt(Val)) + if (!isUInt(Val)) return OutOfRangeError(ValRange); - KD.group_segment_fixed_size = Val; + KD.group_segment_fixed_size = ExprVal; } else if (ID == ".amdhsa_private_segment_fixed_size") { - if (!isUInt(Val)) + if (!isUInt(Val)) return OutOfRangeError(ValRange); - KD.private_segment_fixed_size = Val; + KD.private_segment_fixed_size = ExprVal; } else if (ID == ".amdhsa_kernarg_size") { - if (!isUInt(Val)) + if (!isUInt(Val)) return OutOfRangeError(ValRange); - KD.kernarg_size = Val; + KD.kernarg_size = ExprVal; } else if (ID == ".amdhsa_user_sgpr_count") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); ExplicitUserSGPRCount = Val; } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (hasArchitectedFlatScratch()) return Error(IDRange.Start, "directive is not supported with architected flat scratch", IDRange); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, - Val, ValRange); + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 4; } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (!hasKernargPreload()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); if (Val > getMaxNumUserSGPRs()) return OutOfRangeError(ValRange); - PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val, + PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal, ValRange); if (Val) { ImpliedUserSGPRCount += Val; PreloadLength = Val; } } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (!hasKernargPreload()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); if (Val >= 1024) return OutOfRangeError(ValRange); - PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val, + PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal, ValRange); if (Val) PreloadOffset = Val; } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, - Val, ValRange); + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; @@ -5548,34 +5572,39 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { return Error(IDRange.Start, "directive is not supported with architected flat scratch", IDRange); + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, - ValRange); + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 2; } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, - Val, ValRange); + ExprVal, ValRange); if (Val) ImpliedUserSGPRCount += 1; } else if (ID == ".amdhsa_wavefront_size32") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); EnableWavefrontSize32 = Val; PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, - Val, ValRange); + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal, + ValRange); } else if (ID == ".amdhsa_uses_dynamic_stack") { PARSE_BITS_ENTRY(KD.kernel_code_properties, - KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); + KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal, + ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, "directive is not supported with architected flat scratch", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, + ValRange); } else if (ID == ".amdhsa_enable_private_segment") { if (!hasArchitectedFlatScratch()) return Error( @@ -5583,42 +5612,48 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { "directive is not supported without architected flat scratch", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); + COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, + ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal, ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal, ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal, ValRange); } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal, ValRange); } else if (ID == ".amdhsa_system_vgpr_workitem_id") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal, ValRange); } else if (ID == ".amdhsa_next_free_vgpr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); VGPRRange = ValRange; NextFreeVGPR = Val; } else if (ID == ".amdhsa_next_free_sgpr") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); SGPRRange = ValRange; NextFreeSGPR = Val; } else if (ID == ".amdhsa_accum_offset") { if (!isGFX90A()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); AccumOffset = Val; } else if (ID == ".amdhsa_reserve_vcc") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (!isUInt<1>(Val)) return OutOfRangeError(ValRange); ReserveVCC = Val; } else if (ID == ".amdhsa_reserve_flat_scratch") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (IVersion.Major < 7) return Error(IDRange.Start, "directive requires gfx7+", IDRange); if (hasArchitectedFlatScratch()) @@ -5638,97 +5673,105 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { IDRange); } else if (ID == ".amdhsa_float_round_mode_32") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal, + ValRange); } else if (ID == ".amdhsa_float_round_mode_16_64") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal, + ValRange); } else if (ID == ".amdhsa_float_denorm_mode_32") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal, + ValRange); } else if (ID == ".amdhsa_float_denorm_mode_16_64") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, ValRange); } else if (ID == ".amdhsa_dx10_clamp") { if (IVersion.Major >= 12) return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val, + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, ValRange); } else if (ID == ".amdhsa_ieee_mode") { if (IVersion.Major >= 12) return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val, + COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, ValRange); } else if (ID == ".amdhsa_fp16_overflow") { if (IVersion.Major < 9) return Error(IDRange.Start, "directive requires gfx9+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal, ValRange); } else if (ID == ".amdhsa_tg_split") { if (!isGFX90A()) return Error(IDRange.Start, "directive requires gfx90a+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, - ValRange); + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, + ExprVal, ValRange); } else if (ID == ".amdhsa_workgroup_processor_mode") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal, ValRange); } else if (ID == ".amdhsa_memory_ordered") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal, ValRange); } else if (ID == ".amdhsa_forward_progress") { if (IVersion.Major < 10) return Error(IDRange.Start, "directive requires gfx10+", IDRange); - PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, + COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal, ValRange); } else if (ID == ".amdhsa_shared_vgpr_count") { + EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); if (IVersion.Major < 10 || IVersion.Major >= 12) return Error(IDRange.Start, "directive requires gfx10 or gfx11", IDRange); SharedVGPRCount = Val; PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, - COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val, + COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, - ValRange); + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_denorm_src") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, - ValRange); + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_exception_int_div_zero") { PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, - Val, ValRange); + ExprVal, ValRange); } else if (ID == ".amdhsa_round_robin_scheduling") { if (IVersion.Major < 12) return Error(IDRange.Start, "directive requires gfx12+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val, + COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal, ValRange); } else { return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); @@ -5755,15 +5798,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (!isUInt( VGPRBlocks)) return OutOfRangeError(VGPRRange); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); + AMDGPU::MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()), + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT, + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext()); if (!isUInt( SGPRBlocks)) return OutOfRangeError(SGPRRange); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, - SGPRBlocks); + AMDGPU::MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()), + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT, + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext()); if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) return TokError("amdgpu_user_sgpr_count smaller than than implied by " @@ -5774,11 +5820,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { if (!isUInt(UserSGPRCount)) return TokError("too many user SGPRs enabled"); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, - UserSGPRCount); - - if (PreloadLength && KD.kernarg_size && - (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size)) + AMDGPU::MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()), + COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, + COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext()); + + int64_t IVal = 0; + if (!KD.kernarg_size->evaluateAsAbsolute(IVal)) + return TokError("Kernarg size should be resolvable"); + uint64_t kernarg_size = IVal; + if (PreloadLength && kernarg_size && + (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size)) return TokError("Kernarg preload length + offset is larger than the " "kernarg segment size"); @@ -5790,8 +5842,11 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { "increments of 4"); if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) return TokError("accum_offset exceeds total VGPR allocation"); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, - (AccumOffset / 4 - 1)); + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc3, + MCConstantExpr::create(AccumOffset / 4 - 1, getContext()), + COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, + COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext()); } if (IVersion.Major >= 10 && IVersion.Major < 12) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp new file mode 100644 index 0000000000000..0179d575464df --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.cpp @@ -0,0 +1,32 @@ +//===--- AMDHSAKernelDescriptor.h -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCKernelDescriptor.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" + +using namespace llvm; +using namespace llvm::AMDGPU; + +void MCKernelDescriptor::bits_set(const MCExpr *&Dst, const MCExpr *Value, + uint32_t Shift, uint32_t Mask, + MCContext &Ctx) { + auto Sft = MCConstantExpr::create(Shift, Ctx); + auto Msk = MCConstantExpr::create(Mask, Ctx); + Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx); + Dst = MCBinaryExpr::createOr(Dst, MCBinaryExpr::createShl(Value, Sft, Ctx), + Ctx); +} + +const MCExpr *MCKernelDescriptor::bits_get(const MCExpr *Src, uint32_t Shift, + uint32_t Mask, MCContext &Ctx) { + auto Sft = MCConstantExpr::create(Shift, Ctx); + auto Msk = MCConstantExpr::create(Mask, Ctx); + return MCBinaryExpr::createLShr(MCBinaryExpr::createAnd(Src, Msk, Ctx), Sft, + Ctx); +} diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h new file mode 100644 index 0000000000000..71659e642dd77 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelDescriptor.h @@ -0,0 +1,51 @@ +//===--- AMDGPUMCKernelDescriptor.h ---------------------------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// AMDHSA kernel descriptor MCExpr struct for use in MC layer. Uses +/// AMDHSAKernelDescriptor.h for sizes and constants. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H + +#include "llvm/Support/AMDHSAKernelDescriptor.h" + +namespace llvm { +class MCExpr; +class MCContext; +namespace AMDGPU { + +struct MCKernelDescriptor { + const MCExpr *group_segment_fixed_size = nullptr; + const MCExpr *private_segment_fixed_size = nullptr; + const MCExpr *kernarg_size = nullptr; + const MCExpr *compute_pgm_rsrc3 = nullptr; + const MCExpr *compute_pgm_rsrc1 = nullptr; + const MCExpr *compute_pgm_rsrc2 = nullptr; + const MCExpr *kernel_code_properties = nullptr; + const MCExpr *kernarg_preload = nullptr; + + // MCExpr for: + // Dst = Dst & ~Mask + // Dst = Dst | (Value << Shift) + static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, + uint32_t Mask, MCContext &Ctx); + + // MCExpr for: + // return (Src & Mask) >> Shift + static const MCExpr *bits_get(const MCExpr *Src, uint32_t Shift, + uint32_t Mask, MCContext &Ctx); +}; + +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCKERNELDESCRIPTOR_H diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 4742b0b3e52ec..3006fcdb92823 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetStreamer.h" +#include "AMDGPUMCKernelDescriptor.h" #include "AMDGPUPTNote.h" #include "AMDKernelCodeT.h" #include "Utils/AMDGPUBaseInfo.h" @@ -307,94 +308,142 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR, + const MCKernelDescriptor &KD, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { IsaVersion IVersion = getIsaVersion(STI.getCPU()); + const MCAsmInfo *MAI = getContext().getAsmInfo(); OS << "\t.amdhsa_kernel " << KernelName << '\n'; -#define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \ - STREAM << "\t\t" << DIRECTIVE << " " \ - << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n'; - - OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size - << '\n'; - OS << "\t\t.amdhsa_private_segment_fixed_size " - << KD.private_segment_fixed_size << '\n'; - OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n'; - - PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT); + auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask, + StringRef Directive) { + int64_t IVal; + OS << "\t\t" << Directive << ' '; + const MCExpr *pgm_rsrc1_bits = + MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext()); + if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal)) + OS << static_cast(IVal); + else + pgm_rsrc1_bits->print(OS, MAI); + OS << '\n'; + }; + + OS << "\t\t.amdhsa_group_segment_fixed_size "; + KD.group_segment_fixed_size->print(OS, MAI); + OS << '\n'; + + OS << "\t\t.amdhsa_private_segment_fixed_size "; + KD.private_segment_fixed_size->print(OS, MAI); + OS << '\n'; + + OS << "\t\t.amdhsa_kernarg_size "; + KD.kernarg_size->print(OS, MAI); + OS << '\n'; + + PrintField( + KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count"); if (!hasArchitectedFlatScratch(STI)) - PRINT_FIELD( - OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PrintField( + KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, + ".amdhsa_user_sgpr_private_segment_buffer"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, + ".amdhsa_user_sgpr_dispatch_ptr"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, + ".amdhsa_user_sgpr_queue_ptr"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, + ".amdhsa_user_sgpr_kernarg_segment_ptr"); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, + ".amdhsa_user_sgpr_dispatch_id"); if (!hasArchitectedFlatScratch(STI)) - PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, + ".amdhsa_user_sgpr_flat_scratch_init"); if (hasKernargPreload(STI)) { - PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD, - kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH); - PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD, - kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET); + PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT, + amdhsa::KERNARG_PRELOAD_SPEC_LENGTH, + ".amdhsa_user_sgpr_kernarg_preload_length"); + PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT, + amdhsa::KERNARG_PRELOAD_SPEC_OFFSET, + ".amdhsa_user_sgpr_kernarg_preload_offset"); } - PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + PrintField( + KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, + ".amdhsa_user_sgpr_private_segment_size"); if (IVersion.Major >= 10) - PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, - kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + ".amdhsa_wavefront_size32"); if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) - PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); - PRINT_FIELD(OS, - (hasArchitectedFlatScratch(STI) - ? ".amdhsa_enable_private_segment" - : ".amdhsa_system_sgpr_private_segment_wavefront_offset"), - KD, compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); - PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); - PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + PrintField(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, + ".amdhsa_uses_dynamic_stack"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, + (hasArchitectedFlatScratch(STI) + ? ".amdhsa_enable_private_segment" + : ".amdhsa_system_sgpr_private_segment_wavefront_offset")); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, + ".amdhsa_system_sgpr_workgroup_id_x"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, + ".amdhsa_system_sgpr_workgroup_id_y"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, + ".amdhsa_system_sgpr_workgroup_id_z"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, + ".amdhsa_system_sgpr_workgroup_info"); + PrintField(KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, + ".amdhsa_system_vgpr_workitem_id"); // These directives are required. OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n'; OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n'; - if (AMDGPU::isGFX90A(STI)) - OS << "\t\t.amdhsa_accum_offset " << - (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4 - << '\n'; + if (AMDGPU::isGFX90A(STI)) { + // MCExpr equivalent of taking the (accum_offset + 1) * 4. + const MCExpr *accum_bits = MCKernelDescriptor::bits_get( + KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext()); + accum_bits = MCBinaryExpr::createAdd( + accum_bits, MCConstantExpr::create(1, getContext()), getContext()); + accum_bits = MCBinaryExpr::createMul( + accum_bits, MCConstantExpr::create(4, getContext()), getContext()); + OS << "\t\t.amdhsa_accum_offset "; + int64_t IVal; + if (accum_bits->evaluateAsAbsolute(IVal)) { + OS << static_cast(IVal); + } else { + accum_bits->print(OS, MAI); + } + OS << '\n'; + } if (!ReserveVCC) OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n'; @@ -411,74 +460,105 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( break; } - PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); - PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); - PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); - PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, + ".amdhsa_float_round_mode_32"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, + ".amdhsa_float_round_mode_16_64"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, + ".amdhsa_float_denorm_mode_32"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, + ".amdhsa_float_denorm_mode_16_64"); if (IVersion.Major < 12) { - PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP); - PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, + ".amdhsa_dx10_clamp"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, + ".amdhsa_ieee_mode"); + } + if (IVersion.Major >= 9) { + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, + ".amdhsa_fp16_overflow"); } - if (IVersion.Major >= 9) - PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); if (AMDGPU::isGFX90A(STI)) - PRINT_FIELD(OS, ".amdhsa_tg_split", KD, - compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT); + PrintField(KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split"); if (IVersion.Major >= 10) { - PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE); - PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED); - PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, - compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, + ".amdhsa_workgroup_processor_mode"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, + ".amdhsa_memory_ordered"); + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, + ".amdhsa_forward_progress"); } if (IVersion.Major >= 10 && IVersion.Major < 12) { - PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT); + PrintField(KD.compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, + ".amdhsa_shared_vgpr_count"); } - if (IVersion.Major >= 12) - PRINT_FIELD(OS, ".amdhsa_round_robin_scheduling", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN); - PRINT_FIELD( - OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); - PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); - PRINT_FIELD( - OS, ".amdhsa_exception_fp_ieee_div_zero", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); - PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); - PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); - PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); - PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD, - compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); -#undef PRINT_FIELD + if (IVersion.Major >= 12) { + PrintField(KD.compute_pgm_rsrc1, + amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, + ".amdhsa_round_robin_scheduling"); + } + PrintField( + KD.compute_pgm_rsrc2, + amdhsa:: + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, + ".amdhsa_exception_fp_ieee_invalid_op"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, + ".amdhsa_exception_fp_denorm_src"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa:: + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, + ".amdhsa_exception_fp_ieee_div_zero"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, + ".amdhsa_exception_fp_ieee_overflow"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, + ".amdhsa_exception_fp_ieee_underflow"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, + ".amdhsa_exception_fp_ieee_inexact"); + PrintField( + KD.compute_pgm_rsrc2, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, + ".amdhsa_exception_int_div_zero"); OS << "\t.end_amdhsa_kernel\n"; } @@ -835,7 +915,7 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, + const MCKernelDescriptor &KernelDescriptor, uint64_t NextVGPR, uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) { auto &Streamer = getStreamer(); auto &Context = Streamer.getContext(); @@ -853,7 +933,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( // Kernel descriptor symbol's type and size are fixed. KernelDescriptorSymbol->setType(ELF::STT_OBJECT); KernelDescriptorSymbol->setSize( - MCConstantExpr::create(sizeof(KernelDescriptor), Context)); + MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context)); // The visibility of the kernel code symbol must be protected or less to allow // static relocations from the kernel descriptor to be used. @@ -861,31 +941,43 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); Streamer.emitLabel(KernelDescriptorSymbol); - Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size); - Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size); - Streamer.emitInt32(KernelDescriptor.kernarg_size); - - for (uint8_t Res : KernelDescriptor.reserved0) - Streamer.emitInt8(Res); + Streamer.emitValue( + KernelDescriptor.group_segment_fixed_size, + sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size)); + Streamer.emitValue( + KernelDescriptor.private_segment_fixed_size, + sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size)); + Streamer.emitValue(KernelDescriptor.kernarg_size, + sizeof(amdhsa::kernel_descriptor_t::kernarg_size)); + + for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i) + Streamer.emitInt8(0u); // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The // expression being created is: // (start of kernel code) - (start of kernel descriptor) // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. - Streamer.emitValue(MCBinaryExpr::createSub( - MCSymbolRefExpr::create( - KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context), - MCSymbolRefExpr::create( - KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context), - Context), - sizeof(KernelDescriptor.kernel_code_entry_byte_offset)); - for (uint8_t Res : KernelDescriptor.reserved1) - Streamer.emitInt8(Res); - Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3); - Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1); - Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2); - Streamer.emitInt16(KernelDescriptor.kernel_code_properties); - Streamer.emitInt16(KernelDescriptor.kernarg_preload); - for (uint8_t Res : KernelDescriptor.reserved3) - Streamer.emitInt8(Res); + Streamer.emitValue( + MCBinaryExpr::createSub( + MCSymbolRefExpr::create(KernelCodeSymbol, + MCSymbolRefExpr::VK_AMDGPU_REL64, Context), + MCSymbolRefExpr::create(KernelDescriptorSymbol, + MCSymbolRefExpr::VK_None, Context), + Context), + sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset)); + for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i) + Streamer.emitInt8(0u); + Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3, + sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3)); + Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1, + sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1)); + Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2, + sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2)); + Streamer.emitValue( + KernelDescriptor.kernel_code_properties, + sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties)); + Streamer.emitValue(KernelDescriptor.kernarg_preload, + sizeof(amdhsa::kernel_descriptor_t::kernarg_preload)); + for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i) + Streamer.emitInt8(0u); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 5aa80ff578c6b..706897a5dc1f4 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -22,15 +22,13 @@ class MCSymbol; class formatted_raw_ostream; namespace AMDGPU { + +struct MCKernelDescriptor; namespace HSAMD { struct Metadata; } } // namespace AMDGPU -namespace amdhsa { -struct kernel_descriptor_t; -} - class AMDGPUTargetStreamer : public MCTargetStreamer { AMDGPUPALMetadata PALMetadata; @@ -94,10 +92,11 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { return true; } - virtual void EmitAmdhsaKernelDescriptor( - const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {} + virtual void + EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, + const AMDGPU::MCKernelDescriptor &KernelDescriptor, + uint64_t NextVGPR, uint64_t NextSGPR, + bool ReserveVCC, bool ReserveFlatScr) {} static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); @@ -150,10 +149,11 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override; - void EmitAmdhsaKernelDescriptor( - const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + void + EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, + const AMDGPU::MCKernelDescriptor &KernelDescriptor, + uint64_t NextVGPR, uint64_t NextSGPR, + bool ReserveVCC, bool ReserveFlatScr) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { @@ -205,10 +205,11 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled) override; - void EmitAmdhsaKernelDescriptor( - const MCSubtargetInfo &STI, StringRef KernelName, - const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR, - uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override; + void + EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, + const AMDGPU::MCKernelDescriptor &KernelDescriptor, + uint64_t NextVGPR, uint64_t NextSGPR, + bool ReserveVCC, bool ReserveFlatScr) override; }; } #endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index 0842a58f794b3..14a02b6d8e368 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMAMDGPUDesc AMDGPUMCExpr.cpp AMDGPUMCTargetDesc.cpp AMDGPUTargetStreamer.cpp + AMDGPUMCKernelDescriptor.cpp R600InstPrinter.cpp R600MCCodeEmitter.cpp R600MCTargetDesc.cpp diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6d53f68ace70d..4970055c4bdbf 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -10,6 +10,7 @@ #include "AMDGPU.h" #include "AMDGPUAsmUtils.h" #include "AMDKernelCodeT.h" +#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" @@ -20,6 +21,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -1215,44 +1217,64 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, } } -amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( - const MCSubtargetInfo *STI) { +MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, + MCContext &Ctx) { IsaVersion Version = getIsaVersion(STI->getCPU()); - amdhsa::kernel_descriptor_t KD; - memset(&KD, 0, sizeof(KD)); - - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, - amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); - if (Version.Major >= 12) { - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0); - } else { - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1); + MCKernelDescriptor KD; + const MCExpr *ZeroMCExpr = MCConstantExpr::create(0, Ctx); + const MCExpr *OneMCExpr = MCConstantExpr::create(1, Ctx); + + KD.group_segment_fixed_size = ZeroMCExpr; + KD.private_segment_fixed_size = ZeroMCExpr; + KD.compute_pgm_rsrc1 = ZeroMCExpr; + KD.compute_pgm_rsrc2 = ZeroMCExpr; + KD.compute_pgm_rsrc3 = ZeroMCExpr; + KD.kernarg_size = ZeroMCExpr; + KD.kernel_code_properties = ZeroMCExpr; + KD.kernarg_preload = ZeroMCExpr; + + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, + MCConstantExpr::create(amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx), + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx); + if (Version.Major < 12) { + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx); + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx); } - AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, - amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc2, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, + amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx); if (Version.Major >= 10) { - AMDHSA_BITS_SET(KD.kernel_code_properties, - amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, - STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, - STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); - AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1); - } - if (AMDGPU::isGFX90A(*STI)) { - AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, - STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) + MCKernelDescriptor::bits_set( + KD.kernel_code_properties, OneMCExpr, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx); + if (!STI->getFeatureBits().test(FeatureCuMode)) + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx); + + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc1, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx); } + if (AMDGPU::isGFX90A(*STI) && STI->getFeatureBits().test(FeatureTgSplit)) + MCKernelDescriptor::bits_set( + KD.compute_pgm_rsrc3, OneMCExpr, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx); return KD; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 29ac402d95351..32b73f1d868de 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -26,6 +26,7 @@ struct Align; class Argument; class Function; class GlobalValue; +class MCContext; class MCInstrInfo; class MCRegisterClass; class MCRegisterInfo; @@ -34,12 +35,9 @@ class StringRef; class Triple; class raw_ostream; -namespace amdhsa { -struct kernel_descriptor_t; -} - namespace AMDGPU { +struct MCKernelDescriptor; struct IsaVersion; /// Generic target versions emitted by this version of LLVM. @@ -852,8 +850,8 @@ unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI); -amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( - const MCSubtargetInfo *STI); +MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, + MCContext &Ctx); bool isGroupSegment(const GlobalValue *GV); bool isGlobalSegment(const GlobalValue *GV); diff --git a/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s b/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s new file mode 100644 index 0000000000000..4623500987be8 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-amdgpu-exprs.s @@ -0,0 +1,27 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// OBJDUMP: 0000 00000000 0f000000 00000000 00000000 + +.text + +.p2align 8 +.type caller,@function +caller: + s_endpgm + +.rodata + +.p2align 6 +.amdhsa_kernel caller + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_private_segment_fixed_size max(7, callee1.private_seg_size, callee2.private_seg_size) +.end_amdhsa_kernel + +.set callee1.private_seg_size, 4 +.set callee2.private_seg_size, 15 + +// ASM: .amdhsa_private_segment_fixed_size max(7, callee1.private_seg_size, callee2.private_seg_size) diff --git a/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s new file mode 100644 index 0000000000000..fab3e893352b2 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-expr-failure.s @@ -0,0 +1,281 @@ +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=ASM %s + +// Some expression currently require (immediately) solvable expressions, i.e., +// they don't depend on yet-unknown symbolic values. + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type user_sgpr_count,@function +user_sgpr_count: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_count + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_count defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_count + +.p2align 8 +.type user_sgpr_private_segment_buffer,@function +user_sgpr_private_segment_buffer: + s_endpgm + +.amdhsa_kernel user_sgpr_private_segment_buffer + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_private_segment_buffer defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer + +.p2align 8 +.type user_sgpr_kernarg_preload_length,@function +user_sgpr_kernarg_preload_length: + s_endpgm + +.amdhsa_kernel user_sgpr_kernarg_preload_length + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_kernarg_preload_length defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length defined_boolean + +.p2align 8 +.type user_sgpr_kernarg_preload_offset,@function +user_sgpr_kernarg_preload_offset: + s_endpgm + +.amdhsa_kernel user_sgpr_kernarg_preload_offset + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset defined_boolean + +.p2align 8 +.type user_sgpr_dispatch_ptr,@function +user_sgpr_dispatch_ptr: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_dispatch_ptr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_dispatch_ptr defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr + +.p2align 8 +.type user_sgpr_queue_ptr,@function +user_sgpr_queue_ptr: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_queue_ptr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_queue_ptr defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr + +.p2align 8 +.type user_sgpr_kernarg_segment_ptr,@function +user_sgpr_kernarg_segment_ptr: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_kernarg_segment_ptr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_kernarg_segment_ptr defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr + +.p2align 8 +.type user_sgpr_dispatch_id,@function +user_sgpr_dispatch_id: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_dispatch_id + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_dispatch_id defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id + +.p2align 8 +.type user_sgpr_flat_scratch_init,@function +user_sgpr_flat_scratch_init: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_flat_scratch_init + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_flat_scratch_init defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init + +.p2align 8 +.type user_sgpr_private_segment_size,@function +user_sgpr_private_segment_size: + s_endpgm + +.p2align 6 +.amdhsa_kernel user_sgpr_private_segment_size + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_user_sgpr_private_segment_size defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size + +.p2align 8 +.type wavefront_size32,@function +wavefront_size32: + s_endpgm + +.p2align 6 +.amdhsa_kernel wavefront_size32 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_wavefront_size32 defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_wavefront_size32 + +.p2align 8 +.type next_free_vgpr,@function +next_free_vgpr: + s_endpgm + +.p2align 6 +.amdhsa_kernel next_free_vgpr + .amdhsa_next_free_vgpr defined_boolean + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_next_free_vgpr + +.p2align 8 +.type next_free_sgpr,@function +next_free_sgpr: + s_endpgm + +.p2align 6 +.amdhsa_kernel next_free_sgpr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr defined_boolean + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_next_free_sgpr + +.p2align 8 +.type accum_offset,@function +accum_offset: + s_endpgm + +.p2align 6 +.amdhsa_kernel accum_offset + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_accum_offset + +.p2align 8 +.type reserve_vcc,@function +reserve_vcc: + s_endpgm + +.p2align 6 +.amdhsa_kernel reserve_vcc + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_reserve_vcc defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_reserve_vcc + +.p2align 8 +.type reserve_flat_scratch,@function +reserve_flat_scratch: + s_endpgm + +.p2align 6 +.amdhsa_kernel reserve_flat_scratch + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_reserve_flat_scratch defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_reserve_flat_scratch + +.p2align 8 +.type shared_vgpr_count,@function +shared_vgpr_count: + s_endpgm + +.p2align 6 +.amdhsa_kernel shared_vgpr_count + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 + .amdhsa_shared_vgpr_count defined_boolean +.end_amdhsa_kernel + +// ASM: error: directive should have resolvable expression +// ASM-NEXT: .amdhsa_shared_vgpr_count + +.set defined_boolean, 1 + +// ASM: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s new file mode 100644 index 0000000000000..95af59c413ae6 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s @@ -0,0 +1,190 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0afe4 801f007f 000c0000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0afe4 801f007f 000c0000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1)>>0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&32)>>5 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s new file mode 100644 index 0000000000000..e1107fb69ba41 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s @@ -0,0 +1,186 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0afe4 811f007f 000c0000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0afe4 811f007f 000c0000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 +// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_enable_private_segment 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s new file mode 100644 index 0000000000000..449616d35186b --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s @@ -0,0 +1,184 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f02fe4 811f007f 000c0000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f02fe4 811f007f 000c0000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_round_robin_scheduling defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_workgroup_processor_mode defined_boolean + .amdhsa_memory_ordered defined_boolean + .amdhsa_forward_progress defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_round_robin_scheduling defined_boolean + .amdhsa_enable_private_segment defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10 +// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2147483648)>>31 +// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((((((((((0&(~786432))|(3<<18))&(~536870912))|(1<<29))&(~1073741824))|(1<<30))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_enable_private_segment 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_round_robin_scheduling 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s new file mode 100644 index 0000000000000..c7e05441b45ff --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s @@ -0,0 +1,168 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0af00 801f007f 00080000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0af00 801f007f 00080000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s new file mode 100644 index 0000000000000..49a5015987a65 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s @@ -0,0 +1,171 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefix=ASM %s + +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx801 -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 00f0af00 801f007f 00080000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 00f0af00 801f007f 00080000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_group_segment_fixed_size defined_value+2 + .amdhsa_private_segment_fixed_size defined_value+3 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.set defined_value, 41 +.set defined_2_bits, 3 +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_group_segment_fixed_size defined_value+1 + .amdhsa_private_segment_fixed_size defined_value+2 + .amdhsa_system_vgpr_workitem_id defined_2_bits + .amdhsa_float_round_mode_32 defined_2_bits + .amdhsa_float_round_mode_16_64 defined_2_bits + .amdhsa_float_denorm_mode_32 defined_2_bits + .amdhsa_float_denorm_mode_16_64 defined_2_bits + .amdhsa_system_sgpr_workgroup_id_x defined_boolean + .amdhsa_system_sgpr_workgroup_id_y defined_boolean + .amdhsa_system_sgpr_workgroup_id_z defined_boolean + .amdhsa_system_sgpr_workgroup_info defined_boolean + .amdhsa_exception_fp_ieee_invalid_op defined_boolean + .amdhsa_exception_fp_denorm_src defined_boolean + .amdhsa_exception_fp_ieee_div_zero defined_boolean + .amdhsa_exception_fp_ieee_overflow defined_boolean + .amdhsa_exception_fp_ieee_underflow defined_boolean + .amdhsa_exception_fp_ieee_inexact defined_boolean + .amdhsa_exception_int_div_zero defined_boolean + .amdhsa_uses_dynamic_stack defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2 +// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~12288))|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_value, 41 +// ASM-NEXT: .no_dead_strip defined_value +// ASM-NEXT: .set defined_2_bits, 3 +// ASM-NEXT: .no_dead_strip defined_2_bits +// ASM-NEXT: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 42 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 43 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 3 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 3 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 3 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s new file mode 100644 index 0000000000000..b7f89239160fc --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s @@ -0,0 +1,148 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// When going from asm -> asm, the expressions should remain the same (i.e., symbolic). +// When going from asm -> obj, the expressions should get resolved (through fixups), + +// OBJDUMP: Contents of section .rodata +// expr_defined_later +// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100 +// OBJDUMP-NEXT: 0030 0000ac04 81000000 00000000 00000000 +// expr_defined +// OBJDUMP-NEXT: 0040 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100 +// OBJDUMP-NEXT: 0070 0000ac04 81000000 00000000 00000000 + +.text +// ASM: .text + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type expr_defined_later,@function +expr_defined_later: + s_endpgm + +.p2align 8 +.type expr_defined,@function +expr_defined: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel expr_defined_later + .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean + .amdhsa_dx10_clamp defined_boolean + .amdhsa_ieee_mode defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_tg_split defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +.set defined_boolean, 1 + +.p2align 6 +.amdhsa_kernel expr_defined + .amdhsa_system_sgpr_private_segment_wavefront_offset defined_boolean + .amdhsa_dx10_clamp defined_boolean + .amdhsa_ieee_mode defined_boolean + .amdhsa_fp16_overflow defined_boolean + .amdhsa_tg_split defined_boolean + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel expr_defined_later +// ASM-NEXT: .amdhsa_group_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_accum_offset (((((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&63)>>0)+1)*4 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_tg_split (((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&65536)>>16 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1073741824)>>30 +// ASM-NEXT: .end_amdhsa_kernel + +// ASM: .set defined_boolean, 1 +// ASM-NEXT: .no_dead_strip defined_boolean + +// ASM: .amdhsa_kernel expr_defined +// ASM-NEXT: .amdhsa_group_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_accum_offset 4 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 0 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_tg_split 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +// ASM-NEXT: .amdhsa_exception_int_div_zero 0 +// ASM-NEXT: .end_amdhsa_kernel diff --git a/llvm/test/MC/AMDGPU/hsa-tg-split.s b/llvm/test/MC/AMDGPU/hsa-tg-split.s new file mode 100644 index 0000000000000..5a4d3e2c279c5 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-tg-split.s @@ -0,0 +1,74 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack,+tgsplit -filetype=obj < %s > %t +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// OBJDUMP: Contents of section .rodata +// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000100 +// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 + +.text +// ASM: .text + +.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" + +.amdhsa_code_object_version 4 +// ASM: .amdhsa_code_object_version 4 + +.p2align 8 +.type minimal,@function +minimal: + s_endpgm + +.rodata +// ASM: .rodata + +.p2align 6 +.amdhsa_kernel minimal + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_accum_offset 4 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel minimal +// ASM-NEXT: .amdhsa_group_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 0 +// ASM-NEXT: .amdhsa_kernarg_size 0 +// ASM-NEXT: .amdhsa_user_sgpr_count 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 0 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 0 +// ASM-NEXT: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM-NEXT: .amdhsa_accum_offset 4 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 1 +// ASM-NEXT: .amdhsa_float_round_mode_32 0 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 0 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 +// ASM-NEXT: .amdhsa_dx10_clamp 1 +// ASM-NEXT: .amdhsa_ieee_mode 1 +// ASM-NEXT: .amdhsa_fp16_overflow 0 +// ASM-NEXT: .amdhsa_tg_split 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 0 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 0 +// ASM-NEXT: .amdhsa_exception_int_div_zero 0 +// ASM-NEXT: .end_amdhsa_kernel