86 changes: 86 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsLoongArch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"

Expand Down Expand Up @@ -137,6 +139,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,

setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());

setMinCmpXchgSizeInBits(32);

// Function alignments.
const Align FunctionAlignment(4);
setMinFunctionAlignment(FunctionAlignment);
Expand Down Expand Up @@ -1779,3 +1783,85 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
// TODO: Support vectors.
return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
}

bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
default:
return false;
case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(4);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
// TODO: Add more Intrinsics later.
}
}

TargetLowering::AtomicExpansionKind
LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// TODO: Add more AtomicRMWInst that needs to be extended.
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
return AtomicExpansionKind::None;
}

static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
AtomicRMWInst::BinOp BinOp) {
if (GRLen == 64) {
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
// TODO: support other AtomicRMWInst.
}
}

if (GRLen == 32) {
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
// TODO: support other AtomicRMWInst.
}
}

llvm_unreachable("Unexpected GRLen\n");
}

Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
unsigned GRLen = Subtarget.getGRLen();
Value *Ordering =
Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
Type *Tys[] = {AlignedAddr->getType()};
Function *LlwOpScwLoop = Intrinsic::getDeclaration(
AI->getModule(),
getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);

if (GRLen == 64) {
Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
}

Value *Result;

Result =
Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});

if (GRLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
11 changes: 11 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ class LoongArchTargetLowering : public TargetLowering {
bool isCheapToSpeculateCttz(Type *Ty) const override;
bool isCheapToSpeculateCtlz(Type *Ty) const override;
bool hasAndNot(SDValue Y) const override;
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;

Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt,
AtomicOrdering Ord) const override;

bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;

private:
/// Target-specific function used to lower LoongArch calling conventions.
Expand Down
46 changes: 46 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ class UImmAsmOperand<int width, string suffix = "">
: ImmAsmOperand<"U", width, suffix> {
}

// A parameterized register class alternative to i32imm/i64imm from Target.td.
def grlenimm : Operand<GRLenVT>;

def uimm2 : Operand<GRLenVT> {
let ParserMatchClass = UImmAsmOperand<2>;
}
Expand Down Expand Up @@ -1083,6 +1086,49 @@ defm : StPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i64>;
defm : StPat<atomic_store_unordered_monotonic_64, ST_D, GPR, i64>;
} // Predicates = [IsLA64]

/// Atomic Ops

class PseudoMaskedAM
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
}

def PseudoMaskedAtomicSwap32 : PseudoMaskedAM;

class PseudoAM : Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$incr), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
}

def PseudoAtomicSwap32 : PseudoAM;

class AtomicPat<Intrinsic intrin, Pseudo AMInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;

let Predicates = [IsLA64] in {
def : AtomicPat<int_loongarch_masked_atomicrmw_xchg_i64,
PseudoMaskedAtomicSwap32>;
def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr),
(AMSWAP_DB_W GPR:$incr, GPR:$addr)>;
def : Pat<(atomic_swap_64 GPR:$addr, GPR:$incr),
(AMSWAP_DB_D GPR:$incr, GPR:$addr)>;
} // Predicates = [IsLA64]

let Predicates = [IsLA32] in {
def : AtomicPat<int_loongarch_masked_atomicrmw_xchg_i32,
PseudoMaskedAtomicSwap32>;
def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr),
(PseudoAtomicSwap32 GPR:$incr, GPR:$addr)>;
} // Predicates = [IsLA32]

/// Other pseudo-instructions

// Pessimistically assume the stack pointer will be clobbered
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class LoongArchPassConfig : public TargetPassConfig {

void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass2() override;
};
} // end namespace

Expand All @@ -121,3 +122,10 @@ bool LoongArchPassConfig::addInstSelector() {

return false;
}

void LoongArchPassConfig::addPreEmitPass2() {
// Schedule the expansion of AtomicPseudos at the last possible moment,
// avoiding the possibility for other passes to break the requirements for
// forward progress in the LL/SC block.
addPass(createLoongArchExpandAtomicPseudoPass());
}
145 changes: 145 additions & 0 deletions llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64

define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i8_acquire:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a2, $zero, -4
; LA32-NEXT: and $a2, $a0, $a2
; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: ori $a3, $zero, 255
; LA32-NEXT: sll.w $a3, $a3, $a0
; LA32-NEXT: andi $a1, $a1, 255
; LA32-NEXT: sll.w $a1, $a1, $a0
; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
; LA32-NEXT: ll.w $a4, $a2, 0
; LA32-NEXT: addi.w $a5, $a1, 0
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
; LA32-NEXT: beq $a5, $zero, .LBB0_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i8_acquire:
; LA64: # %bb.0:
; LA64-NEXT: addi.w $a2, $zero, -4
; LA64-NEXT: and $a2, $a0, $a2
; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: ori $a3, $zero, 255
; LA64-NEXT: sll.w $a3, $a3, $a0
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: andi $a1, $a1, 255
; LA64-NEXT: sll.w $a1, $a1, $a0
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
; LA64-NEXT: ll.w $a4, $a2, 0
; LA64-NEXT: addi.w $a5, $a1, 0
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
; LA64-NEXT: beq $a5, $zero, .LBB0_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i8 %b acquire
ret i8 %1
}

define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i16_acquire:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a2, $zero, -4
; LA32-NEXT: and $a2, $a0, $a2
; LA32-NEXT: slli.w $a0, $a0, 3
; LA32-NEXT: lu12i.w $a3, 15
; LA32-NEXT: ori $a3, $a3, 4095
; LA32-NEXT: sll.w $a3, $a3, $a0
; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0
; LA32-NEXT: sll.w $a1, $a1, $a0
; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
; LA32-NEXT: ll.w $a4, $a2, 0
; LA32-NEXT: addi.w $a5, $a1, 0
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
; LA32-NEXT: beq $a5, $zero, .LBB1_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i16_acquire:
; LA64: # %bb.0:
; LA64-NEXT: addi.w $a2, $zero, -4
; LA64-NEXT: and $a2, $a0, $a2
; LA64-NEXT: slli.d $a0, $a0, 3
; LA64-NEXT: lu12i.w $a3, 15
; LA64-NEXT: ori $a3, $a3, 4095
; LA64-NEXT: sll.w $a3, $a3, $a0
; LA64-NEXT: addi.w $a3, $a3, 0
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
; LA64-NEXT: sll.w $a1, $a1, $a0
; LA64-NEXT: addi.w $a1, $a1, 0
; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
; LA64-NEXT: dbar 0
; LA64-NEXT: ll.w $a4, $a2, 0
; LA64-NEXT: addi.w $a5, $a1, 0
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
; LA64-NEXT: beq $a5, $zero, .LBB1_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i16 %b acquire
ret i16 %1
}

define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i32_acquire:
; LA32: # %bb.0:
; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; LA32-NEXT: dbar 0
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: move $a3, $a0
; LA32-NEXT: sc.w $a3, $a1, 0
; LA32-NEXT: beq $a3, $zero, .LBB2_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i32_acquire:
; LA64: # %bb.0:
; LA64-NEXT: amswap_db.w $a0, $a1, $a0
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i32 %b acquire
ret i32 %1
}

define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind {
; LA32-LABEL: atomicrmw_xchg_i64_acquire:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -16
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT: ori $a3, $zero, 2
; LA32-NEXT: bl %plt(__atomic_exchange_8)
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
;
; LA64-LABEL: atomicrmw_xchg_i64_acquire:
; LA64: # %bb.0:
; LA64-NEXT: amswap_db.d $a0, $a1, $a0
; LA64-NEXT: ret
%1 = atomicrmw xchg ptr %a, i64 %b acquire
ret i64 %1
}