diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 12b7f05c44630..17b6e0cb9c3b4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16075,40 +16075,37 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { return AtomicExpansionKind::CmpXChg; } - if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy())) + if (!AMDGPU::isFlatGlobalAddrSpace(AS) && + AS != AMDGPUAS::BUFFER_FAT_POINTER) return AtomicExpansionKind::CmpXChg; - if ((AMDGPU::isFlatGlobalAddrSpace(AS) || - AS == AMDGPUAS::BUFFER_FAT_POINTER) && - Subtarget->hasAtomicFaddNoRtnInsts()) { - if (Subtarget->hasGFX940Insts()) - return AtomicExpansionKind::None; + // TODO: gfx940 supports v2f16 and v2bf16 + if (Subtarget->hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy())) + return AtomicExpansionKind::None; - if (unsafeFPAtomicsDisabled(RMW->getFunction())) - return AtomicExpansionKind::CmpXChg; + if (unsafeFPAtomicsDisabled(RMW->getFunction())) + return AtomicExpansionKind::CmpXChg; - // Always expand system scope fp atomics. - if (HasSystemScope) - return AtomicExpansionKind::CmpXChg; + // Always expand system scope fp atomics. + if (HasSystemScope) + return AtomicExpansionKind::CmpXChg; - if ((AMDGPU::isExtendedGlobalAddrSpace(AS) || - AS == AMDGPUAS::BUFFER_FAT_POINTER) && - Ty->isFloatTy()) { - // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+. - if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) - return ReportUnsafeHWInst(AtomicExpansionKind::None); - // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+. - if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) - return ReportUnsafeHWInst(AtomicExpansionKind::None); - } + // global and flat atomic fadd f64: gfx90a, gfx940. + if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy()) + return ReportUnsafeHWInst(AtomicExpansionKind::None); - // flat atomic fadd f32: gfx940, gfx11+. - if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() && - Subtarget->hasFlatAtomicFaddF32Inst()) + if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { + // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+. + if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) + return ReportUnsafeHWInst(AtomicExpansionKind::None); + // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+. + if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) return ReportUnsafeHWInst(AtomicExpansionKind::None); + } - // global and flat atomic fadd f64: gfx90a, gfx940. - if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts()) + // flat atomic fadd f32: gfx940, gfx11+. + if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) { + if (Subtarget->hasFlatAtomicFaddF32Inst()) return ReportUnsafeHWInst(AtomicExpansionKind::None); // If it is in flat address space, and the type is float, we will try to @@ -16116,15 +16113,12 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // reason we need that is, in the expansion, we emit the check of address // space. If it is in global address space, we emit the global atomic // fadd; if it is in shared address space, we emit the LDS atomic fadd. - if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() && - Subtarget->hasLDSFPAtomicAddF32()) { + if (Subtarget->hasLDSFPAtomicAddF32()) { if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) return AtomicExpansionKind::Expand; if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) return AtomicExpansionKind::Expand; } - - return AtomicExpansionKind::CmpXChg; } return AtomicExpansionKind::CmpXChg;