Skip to content

Commit

Permalink
[RISCV] Add a rematerializable pseudo instruction for LUI+ADDI for gl…
Browse files Browse the repository at this point in the history
…obal addresses. (#93352)

This allows register allocation to rematerialize these instead of
spilling and reloading. We need to make it a single instruction due to
limitations in rematerialization.

This pseudo is expanded to an LUI+ADDI pair between regalloc and post RA
scheduling.

This improves the dynamic instruction count on 531.deepsjeng_r from
spec2017 by 3.2% for the train dataset. 500.perlbench and 502.gcc see a
1% improvement. There are couple regressions, but they are 0.1% or
smaller.

AArch64 has similar pseudo instructions like MOVaddr
  • Loading branch information
topperc authored May 28, 2024
1 parent 00bd2fa commit 2d00c6f
Show file tree
Hide file tree
Showing 19 changed files with 358 additions and 311 deletions.
20 changes: 20 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,26 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),

/// HI and ADD_LO address nodes.

// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
// It will be expanded after register allocation.
// FIXME: The scheduling information does not reflect the multiple instructions.
let Size = 8, isReMaterializable = 1 in
def PseudoMovAddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
Sched<[WriteIALU]>;

def riscv_hi_oneuse : unop_oneuse<riscv_hi>;
def addr_hi_lo : PatFrag<(ops node:$hi, node:$lo),
(riscv_add_lo (riscv_hi_oneuse node:$hi), node:$lo)>;

def : Pat<(addr_hi_lo tglobaladdr:$hi, tglobaladdr:$lo),
(PseudoMovAddr tglobaladdr:$hi, tglobaladdr:$lo)>;
def : Pat<(addr_hi_lo tblockaddress:$hi, tblockaddress:$lo),
(PseudoMovAddr tblockaddress:$hi, tblockaddress:$lo)>;
def : Pat<(addr_hi_lo tjumptable:$hi, tjumptable:$lo),
(PseudoMovAddr tjumptable:$hi, tjumptable:$lo)>;
def : Pat<(addr_hi_lo tconstpool:$hi, tconstpool:$lo),
(PseudoMovAddr tconstpool:$hi, tconstpool:$lo)>;

def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;
Expand Down
35 changes: 27 additions & 8 deletions llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
// 3) The offset value in the Global Address or Constant Pool is 0.
bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
MachineInstr *&Lo) {
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
Hi.getOpcode() != RISCV::PseudoMovAddr)
return false;

const MachineOperand &HiOp1 = Hi.getOperand(1);
Expand All @@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
HiOp1.getOffset() != 0)
return false;

Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
// Most of the code should handle it correctly without modification by
// setting Lo and Hi both point to PseudoMovAddr
Lo = &Hi;
} else {
Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;

Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
}

const MachineOperand &LoOp2 = Lo->getOperand(2);
if (Hi.getOpcode() == RISCV::LUI) {
if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) {
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
!(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
LoOp2.getOffset() != 0)
Expand Down Expand Up @@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,

Hi.getOperand(1).setOffset(NewOffset);
MachineOperand &ImmOp = Lo.getOperand(2);
// Expand PseudoMovAddr into LUI
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
auto *TII = ST->getInstrInfo();
Hi.setDesc(TII->get(RISCV::LUI));
Hi.removeOperand(2);
}

if (Hi.getOpcode() != RISCV::AUIPC)
ImmOp.setOffset(NewOffset);

Expand Down Expand Up @@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
}
}

// Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from
// being erased
if (&Lo == &Hi)
return true;

MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
Lo.eraseFromParent();
return true;
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandMovAddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
};

char RISCVPostRAExpandPseudo::ID = 0;
Expand Down Expand Up @@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoMovImm:
return expandMovImm(MBB, MBBI);
case RISCV::PseudoMovAddr:
return expandMovAddr(MBB, MBBI);
default:
return false;
}
Expand All @@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
return true;
}

bool RISCVPostRAExpandPseudo::expandMovAddr(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();

Register DstReg = MBBI->getOperand(0).getReg();
bool DstIsDead = MBBI->getOperand(0).isDead();
bool Renamable = MBBI->getOperand(0).isRenamable();

BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
.addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
.add(MBBI->getOperand(1));
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
getRenamableRegState(Renamable))
.addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
.add(MBBI->getOperand(2));
MBBI->eraseFromParent();
return true;
}

} // end of anonymous namespace

INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down Expand Up @@ -442,28 +442,28 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32M-LABEL: test_cttz_i64:
; RV32M: # %bb.0:
; RV32M-NEXT: lui a2, 30667
; RV32M-NEXT: addi a2, a2, 1329
; RV32M-NEXT: lui a3, %hi(.LCPI3_0)
; RV32M-NEXT: addi a3, a3, %lo(.LCPI3_0)
; RV32M-NEXT: addi a3, a2, 1329
; RV32M-NEXT: lui a2, %hi(.LCPI3_0)
; RV32M-NEXT: addi a2, a2, %lo(.LCPI3_0)
; RV32M-NEXT: bnez a1, .LBB3_3
; RV32M-NEXT: # %bb.1:
; RV32M-NEXT: li a1, 32
; RV32M-NEXT: beqz a0, .LBB3_4
; RV32M-NEXT: .LBB3_2:
; RV32M-NEXT: neg a1, a0
; RV32M-NEXT: and a0, a0, a1
; RV32M-NEXT: mul a0, a0, a2
; RV32M-NEXT: mul a0, a0, a3
; RV32M-NEXT: srli a0, a0, 27
; RV32M-NEXT: add a0, a3, a0
; RV32M-NEXT: add a0, a2, a0
; RV32M-NEXT: lbu a0, 0(a0)
; RV32M-NEXT: li a1, 0
; RV32M-NEXT: ret
; RV32M-NEXT: .LBB3_3:
; RV32M-NEXT: neg a4, a1
; RV32M-NEXT: and a1, a1, a4
; RV32M-NEXT: mul a1, a1, a2
; RV32M-NEXT: mul a1, a1, a3
; RV32M-NEXT: srli a1, a1, 27
; RV32M-NEXT: add a1, a3, a1
; RV32M-NEXT: add a1, a2, a1
; RV32M-NEXT: lbu a1, 0(a1)
; RV32M-NEXT: bnez a0, .LBB3_2
; RV32M-NEXT: .LBB3_4:
Expand Down Expand Up @@ -814,8 +814,8 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a0, %hi(.LCPI7_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI7_0)
; RV32I-NEXT: lui s4, %hi(.LCPI7_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI7_0)
; RV32I-NEXT: neg a0, s1
; RV32I-NEXT: and a0, s1, a0
; RV32I-NEXT: mv a1, s3
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ define signext i32 @ctz_dereferencing_pointer(ptr %b) nounwind {
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a0, %hi(.LCPI0_0)
; RV32I-NEXT: addi s3, a0, %lo(.LCPI0_0)
; RV32I-NEXT: lui s3, %hi(.LCPI0_0)
; RV32I-NEXT: addi s3, s3, %lo(.LCPI0_0)
; RV32I-NEXT: neg a0, s4
; RV32I-NEXT: and a0, s4, a0
; RV32I-NEXT: mv a1, s1
Expand Down Expand Up @@ -511,8 +511,8 @@ define signext i32 @ctz4(i64 %b) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI6_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI6_0)
; RV32I-NEXT: lui s4, %hi(.LCPI6_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI6_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,31 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -389,8 +389,8 @@ define dso_local i32 @load_ga() local_unnamed_addr #0 {
define dso_local i64 @load_ga_8() nounwind {
; RV32I-LABEL: load_ga_8:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a0, %hi(ga_8)
; RV32I-NEXT: addi a1, a0, %lo(ga_8)
; RV32I-NEXT: lui a1, %hi(ga_8)
; RV32I-NEXT: addi a1, a1, %lo(ga_8)
; RV32I-NEXT: lw a0, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: ret
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rv32zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -126,28 +126,28 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsaddu.vx v8, v8, a1
; CHECK-NEXT: vmsltu.vx v0, v8, a2
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: vmsltu.vx v0, v8, a2
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsext.vf8 v24, v16
; CHECK-NEXT: vsaddu.vx v16, v24, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmsltu.vx v10, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v8, 2
; CHECK-NEXT: vslideup.vi v0, v9, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v10, 4
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v8, v16, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
Expand All @@ -169,13 +169,13 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v10, v16, a2
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vmsltu.vx v10, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
; CHECK-NEXT: vle8.v v11, (a0)
Expand All @@ -187,10 +187,10 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vmsltu.vx v11, v16, a2
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v0, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
; CHECK-NEXT: vle8.v v12, (a0)
; CHECK-NEXT: vmsltu.vx v0, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
; CHECK-NEXT: vle8.v v13, (a0)
Expand All @@ -201,27 +201,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v13, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslideup.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: vslideup.vi v10, v9, 4
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v11, 6
; CHECK-NEXT: vslideup.vi v10, v11, 6
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v12, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v13, 4
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v0, v9, 6
; CHECK-NEXT: vslideup.vi v0, v8, 6
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vi v0, v8, 8
; CHECK-NEXT: vslideup.vi v0, v10, 8
; CHECK-NEXT: ret
%mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
ret <128 x i1> %mask
Expand Down
Loading

0 comments on commit 2d00c6f

Please sign in to comment.