diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index c89212dae72d9..90a4723c9a3ed 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -756,6 +756,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { return ArrayRef(TargetFlags); } +bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, + Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const { + enum MemIOffsetType { + Imm14Shift2, + Imm12, + Imm11Shift1, + Imm10Shift2, + Imm9Shift3, + Imm8, + Imm8Shift1, + Imm8Shift2, + Imm8Shift3 + }; + + MemIOffsetType OT; + switch (MemI.getOpcode()) { + default: + return false; + case LoongArch::LDPTR_W: + case LoongArch::LDPTR_D: + case LoongArch::STPTR_W: + case LoongArch::STPTR_D: + OT = Imm14Shift2; + break; + case LoongArch::LD_B: + case LoongArch::LD_H: + case LoongArch::LD_W: + case LoongArch::LD_D: + case LoongArch::LD_BU: + case LoongArch::LD_HU: + case LoongArch::LD_WU: + case LoongArch::ST_B: + case LoongArch::ST_H: + case LoongArch::ST_W: + case LoongArch::ST_D: + case LoongArch::FLD_S: + case LoongArch::FLD_D: + case LoongArch::FST_S: + case LoongArch::FST_D: + case LoongArch::VLD: + case LoongArch::VST: + case LoongArch::XVLD: + case LoongArch::XVST: + case LoongArch::VLDREPL_B: + case LoongArch::XVLDREPL_B: + OT = Imm12; + break; + case LoongArch::VLDREPL_H: + case LoongArch::XVLDREPL_H: + OT = Imm11Shift1; + break; + case LoongArch::VLDREPL_W: + case LoongArch::XVLDREPL_W: + OT = Imm10Shift2; + break; + case LoongArch::VLDREPL_D: + case LoongArch::XVLDREPL_D: + OT = Imm9Shift3; + break; + case LoongArch::VSTELM_B: + case LoongArch::XVSTELM_B: + OT = Imm8; + break; + case LoongArch::VSTELM_H: + case LoongArch::XVSTELM_H: + OT = Imm8Shift1; + break; + case LoongArch::VSTELM_W: + case LoongArch::XVSTELM_W: + OT = Imm8Shift2; + break; + case LoongArch::VSTELM_D: + case LoongArch::XVSTELM_D: + OT = Imm8Shift3; + break; + } + + if (MemI.getOperand(0).getReg() == Reg) + return false; + + if ((AddrI.getOpcode() != LoongArch::ADDI_W && + AddrI.getOpcode() != LoongArch::ADDI_D) || + !AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm()) + return false; + + int64_t OldOffset = MemI.getOperand(2).getImm(); + int64_t Disp = AddrI.getOperand(2).getImm(); + int64_t NewOffset = OldOffset + Disp; + if (!STI.is64Bit()) + NewOffset = SignExtend64<32>(NewOffset); + + if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) && + !(OT == Imm12 && isInt<12>(NewOffset)) && + !(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) && + !(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) && + !(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) && + !(OT == Imm8 && isInt<8>(NewOffset)) && + !(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) && + !(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) && + !(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset))) + return false; + + AM.BaseReg = AddrI.getOperand(1).getReg(); + AM.ScaledReg = 0; + AM.Scale = 0; + AM.Displacement = NewOffset; + AM.Form = ExtAddrMode::Formula::Basic; + return true; +} + +MachineInstr * +LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const { + const DebugLoc &DL = MemI.getDebugLoc(); + MachineBasicBlock &MBB = *MemI.getParent(); + + assert(AM.ScaledReg == 0 && AM.Scale == 0 && + "Addressing mode not supported for folding"); + + unsigned MemIOp = MemI.getOpcode(); + switch (MemIOp) { + default: + return BuildMI(MBB, MemI, DL, get(MemIOp)) + .addReg(MemI.getOperand(0).getReg(), + MemI.mayLoad() ? RegState::Define : 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + case LoongArch::VSTELM_B: + case LoongArch::VSTELM_H: + case LoongArch::VSTELM_W: + case LoongArch::VSTELM_D: + case LoongArch::XVSTELM_B: + case LoongArch::XVSTELM_H: + case LoongArch::XVSTELM_W: + case LoongArch::XVSTELM_D: + return BuildMI(MBB, MemI, DL, get(MemIOp)) + .addReg(MemI.getOperand(0).getReg(), 0) + .addReg(AM.BaseReg) + .addImm(AM.Displacement) + .addImm(MemI.getOperand(3).getImm()) + .setMemRefs(MemI.memoperands()) + .setMIFlags(MemI.getFlags()); + } +} + // Returns true if this is the sext.w pattern, addi.w rd, rs, 0. bool LoongArch::isSEXT_W(const MachineInstr &MI) { return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() && diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index f25958a32bec4..f69a558bdeca9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -93,6 +93,12 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo { ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, + const MachineInstr &AddrI, + ExtAddrMode &AM) const override; + MachineInstr *emitLdStWithAddr(MachineInstr &MemI, + const ExtAddrMode &AM) const override; + protected: const LoongArchSubtarget &STI; }; diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 9de4c9d83792b..92a9388e5cb7b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -62,6 +62,11 @@ static cl::opt cl::desc("Enable the merge base offset pass"), cl::init(true), cl::Hidden); +static cl::opt + EnableSinkFold("loongarch-enable-sink-fold", + cl::desc("Enable sinking and folding of instruction copies"), + cl::init(true), cl::Hidden); + static Reloc::Model getEffectiveRelocModel(std::optional RM) { return RM.value_or(Reloc::Static); } @@ -146,7 +151,9 @@ namespace { class LoongArchPassConfig : public TargetPassConfig { public: LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + setEnableSinkAndFold(EnableSinkFold); + } LoongArchTargetMachine &getLoongArchTargetMachine() const { return getTM(); diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll index c3656a6bdafba..9bafa10c47e3f 100644 --- a/llvm/test/CodeGen/LoongArch/ldptr.ll +++ b/llvm/test/CodeGen/LoongArch/ldptr.ll @@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind { ; LA32-LABEL: ldptr_w: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_w: @@ -81,10 +80,9 @@ entry: define i64 @ldptr_d(ptr %p) nounwind { ; LA32-LABEL: ldptr_d: ; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a1, $a0, 1 -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: ld.w $a1, $a1, 4 +; LA32-NEXT: addi.w $a1, $a0, 2047 +; LA32-NEXT: ld.w $a0, $a1, 1 +; LA32-NEXT: ld.w $a1, $a1, 5 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_d: diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll index 9a806a12f7de6..93f73e5cd30ff 100644 --- a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll +++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll @@ -25,14 +25,13 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB0_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -45,8 +44,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: ld.w $a0, $s2, 4 -; LA32-NEXT: ld.w $a1, $s2, 0 +; LA32-NEXT: ld.w $a0, $s2, 12 +; LA32-NEXT: ld.w $a1, $s2, 8 ; LA32-NEXT: add.w $a0, $a0, $s6 ; LA32-NEXT: add.w $s3, $a1, $s3 ; LA32-NEXT: sltu $a1, $s3, $a1 @@ -63,8 +62,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s3, $zero ; LA32-NEXT: move $s6, $zero ; LA32-NEXT: .LBB0_4: # %for.cond.cleanup -; LA32-NEXT: st.w $s3, $s2, 0 -; LA32-NEXT: st.w $s6, $s2, 4 +; LA32-NEXT: st.w $s3, $s2, 8 +; LA32-NEXT: st.w $s6, $s2, 12 ; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload @@ -88,8 +87,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB0_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -100,7 +98,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $a0, $s1, 0 +; LA64-NEXT: ld.d $a0, $s1, 8 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: add.d $s2, $a0, $s2 ; LA64-NEXT: bnez $s0, .LBB0_2 @@ -108,7 +106,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB0_3: ; LA64-NEXT: move $s2, $zero ; LA64-NEXT: .LBB0_4: # %for.cond.cleanup -; LA64-NEXT: st.d $s2, $s1, 0 +; LA64-NEXT: st.d $s2, $s1, 8 ; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload @@ -153,14 +151,13 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB1_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -172,7 +169,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: fld.s $fa0, $s2, 0 +; LA32-NEXT: fld.s $fa0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -185,7 +182,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB1_3: ; LA32-NEXT: movgr2fr.w $fs0, $zero ; LA32-NEXT: .LBB1_4: # %for.cond.cleanup -; LA32-NEXT: fst.s $fs0, $s2, 0 +; LA32-NEXT: fst.s $fs0, $s2, 16 ; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload @@ -208,8 +205,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB1_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -220,7 +216,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: fld.s $fa0, $s1, 0 +; LA64-NEXT: fld.s $fa0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: fadd.s $fs0, $fa0, $fs0 ; LA64-NEXT: bnez $s0, .LBB1_2 @@ -228,7 +224,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB1_3: ; LA64-NEXT: movgr2fr.w $fs0, $zero ; LA64-NEXT: .LBB1_4: # %for.cond.cleanup -; LA64-NEXT: fst.s $fs0, $s1, 0 +; LA64-NEXT: fst.s $fs0, $s1, 16 ; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload @@ -271,14 +267,13 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s0, $a3 ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 6 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB2_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -291,7 +286,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: vld $vr0, $s2, 0 +; LA32-NEXT: vld $vr0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -307,7 +302,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB2_3: ; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: .LBB2_4: # %for.cond.cleanup -; LA32-NEXT: vst $vr0, $s2, 0 +; LA32-NEXT: vst $vr0, $s2, 16 ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload @@ -326,8 +321,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill ; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill ; LA64-NEXT: slli.d $a0, $a0, 6 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $a1, .LBB2_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -340,7 +334,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: vld $vr0, $s1, 0 +; LA64-NEXT: vld $vr0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; LA64-NEXT: vadd.w $vr1, $vr0, $vr1 @@ -351,7 +345,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB2_3: ; LA64-NEXT: vrepli.b $vr0, 0 ; LA64-NEXT: .LBB2_4: # %for.cond.cleanup -; LA64-NEXT: vst $vr0, $s1, 0 +; LA64-NEXT: vst $vr0, $s1, 16 ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -393,14 +387,13 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s0, $a3 ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 6 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 32 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB3_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -413,7 +406,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: xvld $xr0, $s2, 0 +; LA32-NEXT: xvld $xr0, $s2, 32 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -429,7 +422,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB3_3: ; LA32-NEXT: xvrepli.b $xr0, 0 ; LA32-NEXT: .LBB3_4: # %for.cond.cleanup -; LA32-NEXT: xvst $xr0, $s2, 0 +; LA32-NEXT: xvst $xr0, $s2, 32 ; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload @@ -448,8 +441,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ; LA64-NEXT: slli.d $a0, $a0, 6 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 32 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $a1, .LBB3_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -462,7 +454,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: xvld $xr0, $s1, 0 +; LA64-NEXT: xvld $xr0, $s1, 32 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload ; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1 @@ -473,7 +465,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB3_3: ; LA64-NEXT: xvrepli.b $xr0, 0 ; LA64-NEXT: .LBB3_4: # %for.cond.cleanup -; LA64-NEXT: xvst $xr0, $s1, 0 +; LA64-NEXT: xvst $xr0, $s1, 32 ; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload @@ -516,14 +508,13 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB4_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -536,7 +527,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: vldrepl.b $vr0, $s2, 0 +; LA32-NEXT: vldrepl.b $vr0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -552,7 +543,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB4_3: ; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: .LBB4_4: # %for.cond.cleanup -; LA32-NEXT: vstelm.b $vr0, $s2, 0, 1 +; LA32-NEXT: vstelm.b $vr0, $s2, 16, 1 ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload @@ -573,8 +564,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB4_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -586,7 +576,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: vldrepl.b $vr0, $s1, 0 +; LA64-NEXT: vldrepl.b $vr0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; LA64-NEXT: vadd.b $vr1, $vr0, $vr1 @@ -597,7 +587,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB4_3: ; LA64-NEXT: vrepli.b $vr0, 0 ; LA64-NEXT: .LBB4_4: # %for.cond.cleanup -; LA64-NEXT: vstelm.b $vr0, $s1, 0, 1 +; LA64-NEXT: vstelm.b $vr0, $s1, 16, 1 ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -643,14 +633,13 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB5_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -663,7 +652,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: xvldrepl.d $xr0, $s2, 0 +; LA32-NEXT: xvldrepl.d $xr0, $s2, 8 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -679,7 +668,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB5_3: ; LA32-NEXT: xvrepli.b $xr0, 0 ; LA32-NEXT: .LBB5_4: # %for.cond.cleanup -; LA32-NEXT: xvstelm.d $xr0, $s2, 0, 1 +; LA32-NEXT: xvstelm.d $xr0, $s2, 8, 1 ; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload @@ -700,8 +689,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB5_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -713,7 +701,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: xvldrepl.d $xr0, $s1, 0 +; LA64-NEXT: xvldrepl.d $xr0, $s1, 8 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload ; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1 @@ -724,7 +712,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB5_3: ; LA64-NEXT: xvrepli.b $xr0, 0 ; LA64-NEXT: .LBB5_4: # %for.cond.cleanup -; LA64-NEXT: xvstelm.d $xr0, $s1, 0, 1 +; LA64-NEXT: xvstelm.d $xr0, $s1, 8, 1 ; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll index d70f9f4ba1603..23b433aa15856 100644 --- a/llvm/test/CodeGen/LoongArch/stptr.ll +++ b/llvm/test/CodeGen/LoongArch/stptr.ll @@ -23,8 +23,7 @@ define void @stptr_w(ptr %p, i32 signext %val) nounwind { ; LA32-LABEL: stptr_w: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_w: @@ -77,9 +76,8 @@ define void @stptr_d(ptr %p, i64 %val) nounwind { ; LA32-LABEL: stptr_d: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a2, $a0, 4 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a2, $a0, 5 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_d: