452 changes: 452 additions & 0 deletions llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp

Large diffs are not rendered by default.

93 changes: 91 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);

if (Subtarget.hasStdExtA())
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
else
setMinCmpXchgSizeInBits(32);
} else {
setMaxAtomicSizeInBitsSupported(0);
}

setBooleanContents(ZeroOrOneBooleanContent);

Expand All @@ -160,6 +162,33 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}

bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
default:
return false;
case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
case Intrinsic::riscv_masked_atomicrmw_add_i32:
case Intrinsic::riscv_masked_atomicrmw_sub_i32:
case Intrinsic::riscv_masked_atomicrmw_nand_i32:
case Intrinsic::riscv_masked_atomicrmw_max_i32:
case Intrinsic::riscv_masked_atomicrmw_min_i32:
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = 4;
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
}
}

bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
Expand Down Expand Up @@ -1596,3 +1625,63 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
return Builder.CreateFence(AtomicOrdering::Acquire);
return nullptr;
}

TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
return AtomicExpansionKind::None;
}

static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
case AtomicRMWInst::Add:
return Intrinsic::riscv_masked_atomicrmw_add_i32;
case AtomicRMWInst::Sub:
return Intrinsic::riscv_masked_atomicrmw_sub_i32;
case AtomicRMWInst::Nand:
return Intrinsic::riscv_masked_atomicrmw_nand_i32;
case AtomicRMWInst::Max:
return Intrinsic::riscv_masked_atomicrmw_max_i32;
case AtomicRMWInst::Min:
return Intrinsic::riscv_masked_atomicrmw_min_i32;
case AtomicRMWInst::UMax:
return Intrinsic::riscv_masked_atomicrmw_umax_i32;
case AtomicRMWInst::UMin:
return Intrinsic::riscv_masked_atomicrmw_umin_i32;
}
}

Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
Type *Tys[] = {AlignedAddr->getType()};
Function *LrwOpScwLoop = Intrinsic::getDeclaration(
AI->getModule(),
getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);

// Must pass the shift amount needed to sign extend the loaded value prior
// to performing a signed comparison for min/max. ShiftAmt is the number of
// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
// is the number of bits to left+right shift the value in order to
// sign-extend.
if (AI->getOperation() == AtomicRMWInst::Min ||
AI->getOperation() == AtomicRMWInst::Max) {
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned ValWidth =
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
Value *SextShamt = Builder.CreateSub(
Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
return Builder.CreateCall(LrwOpScwLoop,
{AlignedAddr, Incr, Mask, SextShamt, Ordering});
}

return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
}
9 changes: 9 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class RISCVTargetLowering : public TargetLowering {
explicit RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI);

bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I = nullptr) const override;
Expand Down Expand Up @@ -115,6 +118,12 @@ class RISCVTargetLowering : public TargetLowering {
bool IsEligibleForTailCallOptimization(CCState &CCInfo,
CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;

TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
virtual Value *emitMaskedAtomicRMWIntrinsic(
IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override;
};
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ def : MnemonicAlias<"sbreak", "ebreak">;

/// Generic pattern classes

class PatGprGpr<SDPatternOperator OpNode, RVInstR Inst>
class PatGprGpr<SDPatternOperator OpNode, RVInst Inst>
: Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
class PatGprSimm12<SDPatternOperator OpNode, RVInstI Inst>
: Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>;
Expand Down
125 changes: 125 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoA.td
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,129 @@ defm : LdPat<atomic_load_32, LW>;
defm : AtomicStPat<atomic_store_8, SB, GPR>;
defm : AtomicStPat<atomic_store_16, SH, GPR>;
defm : AtomicStPat<atomic_store_32, SW, GPR>;

/// AMOs

multiclass AMOPat<string AtomicOp, string BaseInst> {
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
!cast<RVInst>(BaseInst)>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
!cast<RVInst>(BaseInst#"_AQ")>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
!cast<RVInst>(BaseInst#"_RL")>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
!cast<RVInst>(BaseInst#"_AQ_RL")>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
!cast<RVInst>(BaseInst#"_AQ_RL")>;
}

defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
defm : AMOPat<"atomic_load_add_32", "AMOADD_W">;
defm : AMOPat<"atomic_load_and_32", "AMOAND_W">;
defm : AMOPat<"atomic_load_or_32", "AMOOR_W">;
defm : AMOPat<"atomic_load_xor_32", "AMOXOR_W">;
defm : AMOPat<"atomic_load_max_32", "AMOMAX_W">;
defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;

def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr),
(AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>;
def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr),
(AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr),
(AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>;
def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr),
(AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr),
(AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;

/// Pseudo AMOs

class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$incr, i32imm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
}

def PseudoAtomicLoadNand32 : PseudoAMO;
// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;

class PseudoMaskedAMO
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
}

class PseudoMaskedAMOMinMax
: Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
(ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$sextshamt,
i32imm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
"@earlyclobber $scratch2";
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
}

class PseudoMaskedAMOUMinUMax
: Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
(ins GPR:$addr, GPR:$incr, GPR:$mask, i32imm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch1,"
"@earlyclobber $scratch2";
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 0;
}

class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;

class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
imm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
imm:$ordering)>;

def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
PseudoMaskedAtomicSwap32>;
def PseudoMaskedAtomicLoadAdd32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i32,
PseudoMaskedAtomicLoadAdd32>;
def PseudoMaskedAtomicLoadSub32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i32,
PseudoMaskedAtomicLoadSub32>;
def PseudoMaskedAtomicLoadNand32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i32,
PseudoMaskedAtomicLoadNand32>;
def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMOMinMax;
def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i32,
PseudoMaskedAtomicLoadMax32>;
def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMOMinMax;
def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i32,
PseudoMaskedAtomicLoadMin32>;
def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMOUMinUMax;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i32,
PseudoMaskedAtomicLoadUMax32>;
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
PseudoMaskedAtomicLoadUMin32>;
} // Predicates = [HasStdExtA]
10 changes: 10 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ using namespace llvm;
extern "C" void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto PR = PassRegistry::getPassRegistry();
initializeRISCVExpandPseudoPass(*PR);
}

static std::string computeDataLayout(const Triple &TT) {
Expand Down Expand Up @@ -78,6 +80,7 @@ class RISCVPassConfig : public TargetPassConfig {
void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreRegAlloc() override;
};
}
Expand All @@ -99,6 +102,13 @@ bool RISCVPassConfig::addInstSelector() {

void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }

void RISCVPassConfig::addPreEmitPass2() {
// Schedule the expansion of AMOs at the last possible moment, avoiding the
// possibility for other passes to break the requirements for forward
// progress in the LR/SC block.
addPass(createRISCVExpandPseudoPass());
}

void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVMergeBaseOffsetOptPass());
}
4,282 changes: 4,282 additions & 0 deletions llvm/test/CodeGen/RISCV/atomic-rmw.ll

Large diffs are not rendered by default.