Skip to content

Commit

Permalink
[AMDGPU] Add target features for GDS and GWS
Browse files Browse the repository at this point in the history
GFX9 subtargets from GFX90A onwards lack GDS but still have GWS.

Differential Revision: https://reviews.llvm.org/D156713
  • Loading branch information
jayfoad committed Aug 2, 2023
1 parent 8f973d5 commit c2093b8
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 36 deletions.
52 changes: 39 additions & 13 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,18 @@ def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs",
"Enable the architected SGPRs"
>;

def FeatureGDS : SubtargetFeature<"gds",
"HasGDS",
"true",
"Has Global Data Share"
>;

def FeatureGWS : SubtargetFeature<"gws",
"HasGWS",
"true",
"Has Global Wave Sync"
>;

// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
Expand All @@ -917,7 +929,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
FeatureGDS, FeatureGWS
]
>;

Expand All @@ -928,7 +941,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
FeatureImageInsts
FeatureImageInsts, FeatureGDS, FeatureGWS
]
>;

Expand All @@ -943,7 +956,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
FeatureUnalignedBufferAccess, FeatureImageInsts
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS
]
>;

Expand All @@ -961,7 +974,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
FeatureNegativeScratchOffsetBug
FeatureNegativeScratchOffsetBug, FeatureGWS
]
>;

Expand All @@ -980,7 +993,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
FeatureGDS, FeatureGWS
]
>;

Expand All @@ -999,7 +1013,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureFastDenormalF32, FeatureG16,
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
FeatureGWS
]
>;

Expand Down Expand Up @@ -1104,28 +1119,32 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<

def FeatureISAVersion9_0_0 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureMadMixInsts,
[FeatureGDS,
FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;

def FeatureISAVersion9_0_2 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureMadMixInsts,
[FeatureGDS,
FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;

def FeatureISAVersion9_0_4 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureDsSrc2Insts,
[FeatureGDS,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureFmaMixInsts,
FeatureImageGather4D16Bug])>;

def FeatureISAVersion9_0_6 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[HalfRate64Ops,
[FeatureGDS,
HalfRate64Ops,
FeatureFmaMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
Expand All @@ -1139,7 +1158,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<

def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
[HalfRate64Ops,
[FeatureGDS,
HalfRate64Ops,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureAtomicBufferGlobalPkAddF16NoRtnInsts,
Expand All @@ -1148,7 +1168,8 @@ def FeatureISAVersion9_0_8 : FeatureSet<

def FeatureISAVersion9_0_9 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureMadMixInsts,
[FeatureGDS,
FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageInsts,
Expand All @@ -1168,7 +1189,8 @@ def FeatureISAVersion9_0_A : FeatureSet<

def FeatureISAVersion9_0_C : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureMadMixInsts,
[FeatureGDS,
FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;
Expand Down Expand Up @@ -1836,6 +1858,10 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;

def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;

def HasGDS : Predicate<"Subtarget->hasGDS()">;

def HasGWS : Predicate<"Subtarget->hasGWS()">;

// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2369,8 +2369,9 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
}

void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
!Subtarget->hasGWSSemaReleaseAll()) {
if (!Subtarget->hasGWS() ||
(IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
!Subtarget->hasGWSSemaReleaseAll())) {
// Let this error.
SelectCode(N);
return;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1581,8 +1581,8 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {

bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
Intrinsic::ID IID) const {
if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
!STI.hasGWSSemaReleaseAll())
if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
!STI.hasGWSSemaReleaseAll()))
return false;

// intrinsic ID, vsrc, offset
Expand Down
40 changes: 24 additions & 16 deletions llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -969,8 +969,10 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
}

def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
let OtherPredicates = [HasGDS] in {
def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
}
}

multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
Expand All @@ -989,12 +991,14 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
}

def : DSAtomicRetPat<inst, vt,
!cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicRetPat<noRetInst, vt,
!cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
let OtherPredicates = [HasGDS] in {
def : DSAtomicRetPat<inst, vt,
!cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicRetPat<noRetInst, vt,
!cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
}
}


Expand Down Expand Up @@ -1024,10 +1028,12 @@ multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueT
/* complexity */ 1>;
}

def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
let OtherPredicates = [HasGDS] in {
def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
}
}
} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10

Expand All @@ -1047,10 +1053,12 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;

def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
let OtherPredicates = [HasGDS] in {
def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
/* complexity */ 0, /* gds */ 1>;
def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
/* complexity */ 1, /* gds */ 1>;
}
}
} // End SubtargetPredicate = isGFX11Plus

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
// The code produced for "generic" is only useful for tests and cannot
// reasonably be expected to execute on any particular target.
def : ProcessorModel<"generic", NoSchedModel,
[FeatureWavefrontSize64]
[FeatureWavefrontSize64, FeatureGDS, FeatureGWS]
>;

def : ProcessorModel<"generic-hsa", NoSchedModel,
[FeatureWavefrontSize64, FeatureFlatAddressSpace]
[FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
>;

//===------------------------------------------------------------===//
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasArchitectedFlatScratch = false;
bool EnableFlatScratch = false;
bool HasArchitectedSGPRs = false;
bool HasGDS = false;
bool HasGWS = false;
bool AddNoCarryInsts = false;
bool HasUnpackedD16VMem = false;
bool LDSMisalignedBug = false;
Expand Down Expand Up @@ -1155,6 +1157,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the architected SGPRs are enabled.
bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }

/// \returns true if Global Data Share is supported.
bool hasGDS() const { return HasGDS; }

/// \returns true if Global Wave Sync is supported.
bool hasGWS() const { return HasGWS; }

/// \returns true if the machine has merged shaders in which s0-s7 are
/// reserved by the hardware and user SGPRs start at s8
bool hasMergedShaders() const {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return isLegalMUBUFAddressingMode(AM);

if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
if (AS == AMDGPUAS::LOCAL_ADDRESS ||
(AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/CodeGen/AMDGPU/gds-unsupported.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
; RUN: not --crash llc -march=amdgcn -mcpu=gfx90a < %s 2>&1 | FileCheck %s

; GDS is not supported on GFX12+
; CHECK: LLVM ERROR: Cannot select: {{.*}} AtomicLoadAdd

define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
%val = atomicrmw volatile add ptr addrspace(2) %gds, i32 5 acq_rel
store i32 %val, ptr addrspace(1) %out
ret void
}

0 comments on commit c2093b8

Please sign in to comment.