Skip to content

Commit

Permalink
[LoongArch] Add codegen support for atomicrmw min/max operation on LA64
Browse files Browse the repository at this point in the history
This patch is required by OpenMP. After applying this patch, OpenMP regression
test passed. To reduce review difficulty caused by too large patches,
atomicrmw min/max operations on LA32 will be added later.

Differential Revision: https://reviews.llvm.org/D138177
  • Loading branch information
gonglingqin committed Nov 30, 2022
1 parent 7c215a4 commit 5f9b4d8
Show file tree
Hide file tree
Showing 5 changed files with 287 additions and 2 deletions.
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsLoongArch.td
Expand Up @@ -41,6 +41,8 @@ defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;

// @llvm.loongarch.masked.cmpxchg.i64.<p>(
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
Expand Up @@ -133,6 +133,12 @@ bool LoongArchExpandAtomicPseudo::expandMI(
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case LoongArch::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
case LoongArch::PseudoMaskedAtomicLoadMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
NextMBBI);
case LoongArch::PseudoMaskedAtomicLoadMin32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
NextMBBI);
}
return false;
}
Expand Down Expand Up @@ -341,6 +347,17 @@ bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
return true;
}

static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL,
MachineBasicBlock *MBB, Register ValReg,
Register ShamtReg) {
BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg)
.addReg(ValReg)
.addReg(ShamtReg);
BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg)
.addReg(ValReg)
.addReg(ShamtReg);
}

bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
Expand Down Expand Up @@ -417,6 +434,22 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
.addReg(Scratch2Reg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Max:
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
// bge scratch2, incr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
.addReg(Scratch2Reg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Min:
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
// bge incr, scratch2, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
.addReg(IncrReg)
.addReg(Scratch2Reg)
.addMBB(LoopTailMBB);
break;
// TODO: support other AtomicRMWInst.
}

Expand Down
24 changes: 22 additions & 2 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Expand Up @@ -2325,6 +2325,10 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
case AtomicRMWInst::UMin:
return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
case AtomicRMWInst::Max:
return Intrinsic::loongarch_masked_atomicrmw_max_i64;
case AtomicRMWInst::Min:
return Intrinsic::loongarch_masked_atomicrmw_min_i64;
// TODO: support other AtomicRMWInst.
}
}
Expand Down Expand Up @@ -2396,8 +2400,24 @@ Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(

Value *Result;

Result =
Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
// Must pass the shift amount needed to sign extend the loaded value prior
// to performing a signed comparison for min/max. ShiftAmt is the number of
// bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
// is the number of bits to left+right shift the value in order to
// sign-extend.
if (AI->getOperation() == AtomicRMWInst::Min ||
AI->getOperation() == AtomicRMWInst::Max) {
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned ValWidth =
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
Value *SextShamt =
Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
Result = Builder.CreateCall(LlwOpScwLoop,
{AlignedAddr, Incr, Mask, SextShamt, Ordering});
} else {
Result =
Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
}

if (GRLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Expand Up @@ -1338,6 +1338,20 @@ class PseudoMaskedAMUMinUMax
def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax;
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax;

class PseudoMaskedAMMinMax
: Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
(ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$sextshamt,
grlenimm:$ordering)> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
"@earlyclobber $scratch2";
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
}

def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax;
def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;

/// Compare and exchange

class PseudoCmpXchg
Expand All @@ -1362,6 +1376,12 @@ def PseudoMaskedCmpXchg32
let hasSideEffects = 0;
}

class PseudoMaskedAMMinMaxPat<Intrinsic intrin, Pseudo AMInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;

class AtomicPat<Intrinsic intrin, Pseudo AMInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
Expand Down Expand Up @@ -1410,6 +1430,15 @@ def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk),
def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk),
(AMMAX_DB_DU GPR:$rk, GPR:$rj)>;

def : Pat<(atomic_load_min_32 GPR:$rj, GPR:$rk),
(AMMIN_DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_min_64 GPR:$rj, GPR:$rk),
(AMMIN_DB_D GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_max_32 GPR:$rj, GPR:$rk),
(AMMAX_DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_max_64 GPR:$rj, GPR:$rk),
(AMMAX_DB_D GPR:$rk, GPR:$rj)>;

def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
PseudoMaskedAtomicLoadUMax32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
Expand All @@ -1423,6 +1452,11 @@ def : Pat<(int_loongarch_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
(PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;

def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
PseudoMaskedAtomicLoadMax32>;
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i64,
PseudoMaskedAtomicLoadMin32>;
} // Predicates = [IsLA64]

defm : PseudoBinPat<"atomic_load_nand_32", PseudoAtomicLoadNand32>;
Expand Down
196 changes: 196 additions & 0 deletions llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
Expand Up @@ -181,3 +181,199 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
%1 = atomicrmw umin ptr %a, i64 %b acquire
ret i64 %1
}

define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-LABEL: atomicrmw_max_i8_acquire:
; LA64: # %bb.0:
; LA64-NEXT: addi.w $a2, $zero, -4
; LA64-NEXT: and $a2, $a0, $a2
; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
; LA64-NEXT: sll.w $a3, $a3, $a0
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: ext.w.b $a1, $a1
; LA64-NEXT: sll.w $a1, $a1, $a0
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: andi $a4, $a0, 24
; LA64-NEXT: xori $a4, $a4, 56
; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
; LA64-NEXT: ll.w $a5, $a2, 0
; LA64-NEXT: and $a7, $a5, $a3
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a4
; LA64-NEXT: sra.w $a7, $a7, $a4
; LA64-NEXT: bge $a7, $a1, .LBB8_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
; LA64-NEXT: xor $a6, $a5, $a1
; LA64-NEXT: and $a6, $a6, $a3
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1
; LA64-NEXT: sc.w $a6, $a2, 0
; LA64-NEXT: beqz $a6, .LBB8_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
; LA64-NEXT: srl.w $a0, $a5, $a0
; LA64-NEXT: ret
%1 = atomicrmw max ptr %a, i8 %b acquire
ret i8 %1
}

define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-LABEL: atomicrmw_max_i16_acquire:
; LA64: # %bb.0:
; LA64-NEXT: addi.w $a2, $zero, -4
; LA64-NEXT: and $a2, $a0, $a2
; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: andi $a3, $a0, 24
; LA64-NEXT: ori $a4, $zero, 48
; LA64-NEXT: sub.d $a3, $a4, $a3
; LA64-NEXT: lu12i.w $a4, 15
; LA64-NEXT: ori $a4, $a4, 4095
; LA64-NEXT: sll.w $a4, $a4, $a0
; LA64-NEXT: addi.w $a4, $a4, 0
; LA64-NEXT: ext.w.h $a1, $a1
; LA64-NEXT: sll.w $a1, $a1, $a0
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
; LA64-NEXT: ll.w $a5, $a2, 0
; LA64-NEXT: and $a7, $a5, $a4
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a3
; LA64-NEXT: sra.w $a7, $a7, $a3
; LA64-NEXT: bge $a7, $a1, .LBB9_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
; LA64-NEXT: xor $a6, $a5, $a1
; LA64-NEXT: and $a6, $a6, $a4
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1
; LA64-NEXT: sc.w $a6, $a2, 0
; LA64-NEXT: beqz $a6, .LBB9_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
; LA64-NEXT: srl.w $a0, $a5, $a0
; LA64-NEXT: ret
%1 = atomicrmw max ptr %a, i16 %b acquire
ret i16 %1
}

define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
; LA64-LABEL: atomicrmw_max_i32_acquire:
; LA64: # %bb.0:
; LA64-NEXT: ammax_db.w $a2, $a1, $a0
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
%1 = atomicrmw max ptr %a, i32 %b acquire
ret i32 %1
}

define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; LA64-LABEL: atomicrmw_max_i64_acquire:
; LA64: # %bb.0:
; LA64-NEXT: ammax_db.d $a2, $a1, $a0
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
%1 = atomicrmw max ptr %a, i64 %b acquire
ret i64 %1
}

define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-LABEL: atomicrmw_min_i8_acquire:
; LA64: # %bb.0:
; LA64-NEXT: addi.w $a2, $zero, -4
; LA64-NEXT: and $a2, $a0, $a2
; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
; LA64-NEXT: sll.w $a3, $a3, $a0
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: ext.w.b $a1, $a1
; LA64-NEXT: sll.w $a1, $a1, $a0
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: andi $a4, $a0, 24
; LA64-NEXT: xori $a4, $a4, 56
; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
; LA64-NEXT: ll.w $a5, $a2, 0
; LA64-NEXT: and $a7, $a5, $a3
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a4
; LA64-NEXT: sra.w $a7, $a7, $a4
; LA64-NEXT: bge $a1, $a7, .LBB12_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
; LA64-NEXT: xor $a6, $a5, $a1
; LA64-NEXT: and $a6, $a6, $a3
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1
; LA64-NEXT: sc.w $a6, $a2, 0
; LA64-NEXT: beqz $a6, .LBB12_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
; LA64-NEXT: srl.w $a0, $a5, $a0
; LA64-NEXT: ret
%1 = atomicrmw min ptr %a, i8 %b acquire
ret i8 %1
}

define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-LABEL: atomicrmw_min_i16_acquire:
; LA64: # %bb.0:
; LA64-NEXT: addi.w $a2, $zero, -4
; LA64-NEXT: and $a2, $a0, $a2
; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: andi $a3, $a0, 24
; LA64-NEXT: ori $a4, $zero, 48
; LA64-NEXT: sub.d $a3, $a4, $a3
; LA64-NEXT: lu12i.w $a4, 15
; LA64-NEXT: ori $a4, $a4, 4095
; LA64-NEXT: sll.w $a4, $a4, $a0
; LA64-NEXT: addi.w $a4, $a4, 0
; LA64-NEXT: ext.w.h $a1, $a1
; LA64-NEXT: sll.w $a1, $a1, $a0
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
; LA64-NEXT: ll.w $a5, $a2, 0
; LA64-NEXT: and $a7, $a5, $a4
; LA64-NEXT: move $a6, $a5
; LA64-NEXT: sll.w $a7, $a7, $a3
; LA64-NEXT: sra.w $a7, $a7, $a3
; LA64-NEXT: bge $a1, $a7, .LBB13_3
; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
; LA64-NEXT: xor $a6, $a5, $a1
; LA64-NEXT: and $a6, $a6, $a4
; LA64-NEXT: xor $a6, $a5, $a6
; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1
; LA64-NEXT: sc.w $a6, $a2, 0
; LA64-NEXT: beqz $a6, .LBB13_1
; LA64-NEXT: # %bb.4:
; LA64-NEXT: dbar 1792
; LA64-NEXT: # %bb.5:
; LA64-NEXT: srl.w $a0, $a5, $a0
; LA64-NEXT: ret
%1 = atomicrmw min ptr %a, i16 %b acquire
ret i16 %1
}

define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
; LA64-LABEL: atomicrmw_min_i32_acquire:
; LA64: # %bb.0:
; LA64-NEXT: ammin_db.w $a2, $a1, $a0
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
%1 = atomicrmw min ptr %a, i32 %b acquire
ret i32 %1
}

define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; LA64-LABEL: atomicrmw_min_i64_acquire:
; LA64: # %bb.0:
; LA64-NEXT: ammin_db.d $a2, $a1, $a0
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
%1 = atomicrmw min ptr %a, i64 %b acquire
ret i64 %1
}

0 comments on commit 5f9b4d8

Please sign in to comment.