diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index b178623a319d0..63044b08f4830 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -899,6 +899,18 @@ def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs", "Enable the architected SGPRs" >; +def FeatureGDS : SubtargetFeature<"gds", + "HasGDS", + "true", + "Has Global Data Share" +>; + +def FeatureGWS : SubtargetFeature<"gws", + "HasGWS", + "true", + "Has Global Wave Sync" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -917,7 +929,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel, - FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts + FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts, + FeatureGDS, FeatureGWS ] >; @@ -928,7 +941,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess, - FeatureImageInsts + FeatureImageInsts, FeatureGDS, FeatureGWS ] >; @@ -943,7 +956,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32, - FeatureUnalignedBufferAccess, FeatureImageInsts + FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS ] >; @@ -961,7 +974,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK, FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, - FeatureNegativeScratchOffsetBug + FeatureNegativeScratchOffsetBug, FeatureGWS ] >; @@ -980,7 +993,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts, + FeatureGDS, FeatureGWS ] >; @@ -999,7 +1013,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11", FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureA16, FeatureFastDenormalF32, FeatureG16, - FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess + FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS, + FeatureGWS ] >; @@ -1104,28 +1119,32 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet< def FeatureISAVersion9_0_0 : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureMadMixInsts, + [FeatureGDS, + FeatureMadMixInsts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureImageGather4D16Bug])>; def FeatureISAVersion9_0_2 : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureMadMixInsts, + [FeatureGDS, + FeatureMadMixInsts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureImageGather4D16Bug])>; def FeatureISAVersion9_0_4 : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureDsSrc2Insts, + [FeatureGDS, + FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFmaMixInsts, FeatureImageGather4D16Bug])>; def FeatureISAVersion9_0_6 : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, - [HalfRate64Ops, + [FeatureGDS, + HalfRate64Ops, FeatureFmaMixInsts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, @@ -1139,7 +1158,8 @@ def FeatureISAVersion9_0_6 : FeatureSet< def FeatureISAVersion9_0_8 : FeatureSet< !listconcat(FeatureISAVersion9_0_MI_Common.Features, - [HalfRate64Ops, + [FeatureGDS, + HalfRate64Ops, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, @@ -1148,7 +1168,8 @@ def FeatureISAVersion9_0_8 : FeatureSet< def FeatureISAVersion9_0_9 : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureMadMixInsts, + [FeatureGDS, + FeatureMadMixInsts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureImageInsts, @@ -1168,7 +1189,8 @@ def FeatureISAVersion9_0_A : FeatureSet< def FeatureISAVersion9_0_C : FeatureSet< !listconcat(FeatureISAVersion9_0_Common.Features, - [FeatureMadMixInsts, + [FeatureGDS, + FeatureMadMixInsts, FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureImageGather4D16Bug])>; @@ -1836,6 +1858,10 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">; def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; +def HasGDS : Predicate<"Subtarget->hasGDS()">; + +def HasGWS : Predicate<"Subtarget->hasGWS()">; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 825c6f0acd0f2..73820375a1e6e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2369,8 +2369,9 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) { } void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { - if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all && - !Subtarget->hasGWSSemaReleaseAll()) { + if (!Subtarget->hasGWS() || + (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all && + !Subtarget->hasGWSSemaReleaseAll())) { // Let this error. SelectCode(N); return; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 6a55c6242da6c..e9df189239661 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1581,8 +1581,8 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) { bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const { - if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all && - !STI.hasGWSSemaReleaseAll()) + if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all && + !STI.hasGWSSemaReleaseAll())) return false; // intrinsic ID, vsrc, offset diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 85a3f763cd5a2..90a656ad50b46 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -969,8 +969,10 @@ multiclass DSAtomicRetPat_mc { !cast(frag#"_local_"#vt.Size)>; } - def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + } } multiclass DSAtomicRetNoRetPat_mc(frag#"_local_noret_"#vt.Size), /* complexity */ 1>; } - def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; - def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt.Size), - /* complexity */ 1, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicRetPat(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; + } } @@ -1024,10 +1028,12 @@ multiclass DSAtomicCmpXChgSwapped_mc; } - def : DSAtomicCmpXChgSwapped(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; - def : DSAtomicCmpXChgSwapped(frag#"_region_m0_noret_"#vt.Size), - /* complexity */ 1, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicCmpXChgSwapped(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicCmpXChgSwapped(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; + } } } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 @@ -1047,10 +1053,12 @@ multiclass DSAtomicCmpXChg_mc(!cast(noRetInst)#"_gfx9"), vt, !cast(frag#"_local_noret_"#vt.Size), /* complexity */ 1>; - def : DSAtomicCmpXChg(frag#"_region_m0_"#vt.Size), - /* complexity */ 0, /* gds */ 1>; - def : DSAtomicCmpXChg(frag#"_region_m0_noret_"#vt.Size), - /* complexity */ 1, /* gds */ 1>; + let OtherPredicates = [HasGDS] in { + def : DSAtomicCmpXChg(frag#"_region_m0_"#vt.Size), + /* complexity */ 0, /* gds */ 1>; + def : DSAtomicCmpXChg(frag#"_region_m0_noret_"#vt.Size), + /* complexity */ 1, /* gds */ 1>; + } } } // End SubtargetPredicate = isGFX11Plus diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index b9c9358f88b9d..b4ae3a7a08e4e 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -9,11 +9,11 @@ // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, - [FeatureWavefrontSize64] + [FeatureWavefrontSize64, FeatureGDS, FeatureGWS] >; def : ProcessorModel<"generic-hsa", NoSchedModel, - [FeatureWavefrontSize64, FeatureFlatAddressSpace] + [FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace] >; //===------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index ef5470df876d0..e1a4a1e76f84a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -180,6 +180,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasArchitectedFlatScratch = false; bool EnableFlatScratch = false; bool HasArchitectedSGPRs = false; + bool HasGDS = false; + bool HasGWS = false; bool AddNoCarryInsts = false; bool HasUnpackedD16VMem = false; bool LDSMisalignedBug = false; @@ -1155,6 +1157,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// \returns true if the architected SGPRs are enabled. bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; } + /// \returns true if Global Data Share is supported. + bool hasGDS() const { return HasGDS; } + + /// \returns true if Global Wave Sync is supported. + bool hasGWS() const { return HasGWS; } + /// \returns true if the machine has merged shaders in which s0-s7 are /// reserved by the hardware and user SGPRs start at s8 bool hasMergedShaders() const { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7bc8a24cebcd0..0857e841bf829 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1414,7 +1414,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, if (AS == AMDGPUAS::PRIVATE_ADDRESS) return isLegalMUBUFAddressingMode(AM); - if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { + if (AS == AMDGPUAS::LOCAL_ADDRESS || + (AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) { // Basic, single offset DS instructions allow a 16-bit unsigned immediate // field. // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have diff --git a/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll b/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll new file mode 100644 index 0000000000000..7ed21baaf2439 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll @@ -0,0 +1,10 @@ +; RUN: not --crash llc -march=amdgcn -mcpu=gfx90a < %s 2>&1 | FileCheck %s + +; GDS is not supported on GFX12+ +; CHECK: LLVM ERROR: Cannot select: {{.*}} AtomicLoadAdd + +define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 { + %val = atomicrmw volatile add ptr addrspace(2) %gds, i32 5 acq_rel + store i32 %val, ptr addrspace(1) %out + ret void +}