Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[AMDGPU] Add FeatureFlatAtomicFaddF32Inst
Feature used by targets that have flat_atomic_add_f32 instruction
(gfx940 and gfx11). Remove isGFX940GFX11Plus.
Add hasFlatAtomicFaddF32Inst Subtarget check for codegen.

Differential Revision: https://reviews.llvm.org/D134532
  • Loading branch information
petar-avramovic committed Sep 23, 2022
1 parent 42ef572 commit e03d36d
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 9 deletions.
17 changes: 12 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Expand Up @@ -622,6 +622,13 @@ def FeatureAtomicPkFaddNoRtnInsts
[FeatureFlatGlobalInsts]
>;

def FeatureFlatAtomicFaddF32Inst
: SubtargetFeature<"flat-atomic-fadd-f32-inst",
"HasFlatAtomicFaddF32Inst",
"true",
"Has flat_atomic_add_f32 instruction"
>;

def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support",
"SupportsSRAMECC",
"true",
Expand Down Expand Up @@ -1142,6 +1149,7 @@ def FeatureISAVersion9_4_0 : FeatureSet<
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureAtomicPkFaddNoRtnInsts,
FeatureFlatAtomicFaddF32Inst,
FeatureSupportsSRAMECC,
FeaturePackedTID,
FeatureArchitectedFlatScratch,
Expand Down Expand Up @@ -1281,6 +1289,7 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureArchitectedFlatScratch,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
Expand Down Expand Up @@ -1510,11 +1519,6 @@ def isGFX940Plus :
Predicate<"Subtarget->hasGFX940Insts()">,
AssemblerPredicate<(all_of FeatureGFX940Insts)>;

def isGFX940GFX11Plus :
Predicate<"Subtarget->hasGFX940Insts() ||"
"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">,
AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>;

def isGFX8GFX9NotGFX940 :
Predicate<"!Subtarget->hasGFX940Insts() &&"
"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
Expand Down Expand Up @@ -1751,6 +1755,9 @@ def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">,
def HasAtomicPkFaddNoRtnInsts
: Predicate<"Subtarget->hasAtomicPkFaddNoRtnInsts()">,
AssemblerPredicate<(all_of FeatureAtomicPkFaddNoRtnInsts)>;
def HasFlatAtomicFaddF32Inst
: Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">,
AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>;

def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;
Expand Down
11 changes: 7 additions & 4 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Expand Up @@ -728,9 +728,9 @@ defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
} // End SubtargetPredicate = isGFX7GFX10GFX11

// GFX940-, GFX11-only flat instructions.
let SubtargetPredicate = isGFX940GFX11Plus in {
let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>;
} // End SubtargetPredicate = isGFX940GFX11Plus
} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst

defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
Expand Down Expand Up @@ -1476,10 +1476,13 @@ defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_f
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
}

let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;
}

let OtherPredicates = [isGFX940Plus] in {
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_flat", v2f16>;
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>;
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>;
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Expand Up @@ -150,6 +150,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
bool HasAtomicPkFaddNoRtnInsts = false;
bool HasFlatAtomicFaddF32Inst = false;
bool SupportsSRAMECC = false;

// This should not be used directly. 'TargetID' tracks the dynamic settings
Expand Down Expand Up @@ -746,6 +747,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }

bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }

bool hasNoSdstCMPX() const {
return HasNoSdstCMPX;
}
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
Expand Up @@ -5,6 +5,7 @@ declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1
declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1)
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)
declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float*, float)

; GCN-LABEL: {{^}}buffer_atomic_add_f32:
; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen
Expand Down Expand Up @@ -99,4 +100,12 @@ define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspac
ret void
}

; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget:
; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(float* %ptr, float %data) #1 {
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data)
ret void
}

attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"}
attributes #1 = { "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst"}

0 comments on commit e03d36d

Please sign in to comment.