diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index e9f43b9a71648..84bb29433fb3b 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -438,18 +438,19 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs); Register VLENB = 0; - unsigned PreHandledNum = 0; + unsigned VLENBShift = 0; + unsigned PrevHandledNum = 0; unsigned I = 0; while (I != NumRegs) { auto [LMulHandled, RegClass, Opcode] = getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill); auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled); bool IsLast = I + RegNumHandled == NumRegs; - if (PreHandledNum) { + if (PrevHandledNum) { Register Step; // Optimize for constant VLEN. if (auto VLEN = STI.getRealVLen()) { - int64_t Offset = *VLEN / 8 * PreHandledNum; + int64_t Offset = *VLEN / 8 * PrevHandledNum; Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); } else { @@ -457,15 +458,21 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB); } - uint32_t ShiftAmount = Log2_32(PreHandledNum); - if (ShiftAmount == 0) - Step = VLENB; - else { - Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) - .addReg(VLENB, getKillRegState(IsLast)) - .addImm(ShiftAmount); + uint32_t ShiftAmount = Log2_32(PrevHandledNum); + // To avoid using an extra register, we shift the VLENB register and + // remember how much it has been shifted. We can then use relative + // shifts to adjust to the desired shift amount. + if (VLENBShift > ShiftAmount) { + BuildMI(MBB, II, DL, TII->get(RISCV::SRLI), VLENB) + .addReg(VLENB, RegState::Kill) + .addImm(VLENBShift - ShiftAmount); + } else if (VLENBShift < ShiftAmount) { + BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VLENB) + .addReg(VLENB, RegState::Kill) + .addImm(ShiftAmount - VLENBShift); } + VLENBShift = ShiftAmount; + Step = VLENB; } BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) @@ -489,7 +496,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, if (IsSpill) MIB.addReg(Reg, RegState::Implicit); - PreHandledNum = RegNumHandled; + PrevHandledNum = RegNumHandled; RegEncoding += RegNumHandled; I += RegNumHandled; } diff --git a/llvm/test/CodeGen/RISCV/rvv/pr165232.ll b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll new file mode 100644 index 0000000000000..bef53c6a5ae62 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pr165232.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +define i1 @main(ptr %var_117, ptr %arrayinit.element3045, ptr %arrayinit.element3047, ptr %arrayinit.element3049, ptr %arrayinit.element3051, ptr %arrayinit.element3053, ptr %arrayinit.element3055, ptr %arrayinit.element3057, ptr %arrayinit.element3059, ptr %arrayinit.element3061, ptr %arrayinit.element3063, ptr %arrayinit.element3065, ptr %arrayinit.element3067, i64 %var_94_i.07698, target("riscv.vector.tuple", , 2) %0, target("riscv.vector.tuple", , 4) %1) { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: mv t1, t0 +; CHECK-NEXT: slli t0, t0, 1 +; CHECK-NEXT: add t0, t0, t1 +; CHECK-NEXT: sub sp, sp, t0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t0, 56(a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t1, 48(a1) +; CHECK-NEXT: vsetvli t2, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t2, 40(a1) +; CHECK-NEXT: # kill: def $v10 killed $v9 killed $vtype +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t3, 32(a1) +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t4, 16(a1) +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld t5, 24(a1) +; CHECK-NEXT: vmv.v.i v13, 0 +; CHECK-NEXT: vsetvli t6, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v22, 0 +; CHECK-NEXT: vmv1r.v v14, v9 +; CHECK-NEXT: sd zero, 0(a0) +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv1r.v v15, v9 +; CHECK-NEXT: vmv1r.v v18, v9 +; CHECK-NEXT: li t6, 1023 +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vmv1r.v v19, v9 +; CHECK-NEXT: slli t6, t6, 52 +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs2r.v v22, (a1) # vscale x 16-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vs4r.v v24, (a1) # vscale x 32-byte Folded Spill +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: ld a2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: vs2r.v v28, (a1) # vscale x 16-byte Folded Spill +; CHECK-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: vmv1r.v v20, v9 +; CHECK-NEXT: sd t6, 0(t5) +; CHECK-NEXT: vmv2r.v v16, v14 +; CHECK-NEXT: vmv2r.v v14, v12 +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv1r.v v11, v9 +; CHECK-NEXT: vmv1r.v v21, v9 +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 3 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: slli t6, t6, 1 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vs2r.v v20, (t5) # vscale x 16-byte Folded Spill +; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v19, 0 +; CHECK-NEXT: vmclr.m v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.i v6, 0 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmv1r.v v20, v19 +; CHECK-NEXT: vmv1r.v v3, v19 +; CHECK-NEXT: vmv1r.v v5, v19 +; CHECK-NEXT: vmv1r.v v2, v19 +; CHECK-NEXT: vmv1r.v v31, v19 +; CHECK-NEXT: vmv1r.v v30, v19 +; CHECK-NEXT: vmv1r.v v4, v19 +; CHECK-NEXT: vmv2r.v v22, v10 +; CHECK-NEXT: vmv4r.v v24, v12 +; CHECK-NEXT: vmv2r.v v28, v16 +; CHECK-NEXT: vmv2r.v v8, v6 +; CHECK-NEXT: vmv1r.v v18, v19 +; CHECK-NEXT: vmv1r.v v21, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vle32.v v20, (t4) +; CHECK-NEXT: vle32.v v3, (t1) +; CHECK-NEXT: vle32.v v30, (a7) +; CHECK-NEXT: vle64.v v8, (a4) +; CHECK-NEXT: vle32.v v5, (t2) +; CHECK-NEXT: vle32.v v2, (t3) +; CHECK-NEXT: vle32.v v31, (a6) +; CHECK-NEXT: vmv1r.v v24, v30 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vmflt.vv v21, v8, v6, v0.t +; CHECK-NEXT: vmv1r.v v8, v19 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vle32.v v18, (a2) +; CHECK-NEXT: vle32.v v8, (a3) +; CHECK-NEXT: vle32.v v4, (a5) +; CHECK-NEXT: vmv1r.v v22, v20 +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 3 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vl1r.v v1, (t5) # vscale x 8-byte Folded Reload +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl2r.v v2, (t5) # vscale x 16-byte Folded Reload +; CHECK-NEXT: slli t6, t6, 1 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl1r.v v4, (t5) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vsseg4e32.v v1, (zero) +; CHECK-NEXT: vsseg8e32.v v22, (a1) +; CHECK-NEXT: vmv1r.v v0, v21 +; CHECK-NEXT: vssub.vv v8, v19, v18, v0.t +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 2 +; CHECK-NEXT: mv t6, t5 +; CHECK-NEXT: slli t5, t5, 1 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vsetvli zero, t0, e64, m2, ta, ma +; CHECK-NEXT: vsseg2e64.v v20, (zero) +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: addi t5, sp, 16 +; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: slli t6, t6, 2 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vsetivli zero, 0, e64, m2, ta, ma +; CHECK-NEXT: vsseg4e64.v v20, (zero), v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg8e32.v v8, (a0) +; CHECK-NEXT: csrr t5, vlenb +; CHECK-NEXT: slli t5, t5, 4 +; CHECK-NEXT: add t5, sp, t5 +; CHECK-NEXT: addi t5, t5, 16 +; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: csrr t6, vlenb +; CHECK-NEXT: slli t6, t6, 2 +; CHECK-NEXT: add t5, t5, t6 +; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vsseg4e64.v v20, (zero) +; CHECK-NEXT: j .LBB0_1 +entry: + store double 0.000000e+00, ptr %var_117, align 8 + store double 1.000000e+00, ptr %arrayinit.element3061, align 8 + br label %for.body + +for.body: ; preds = %for.body, %entry + %2 = call @llvm.riscv.vle.nxv2f32.p0.i64( zeroinitializer, ptr %arrayinit.element3059, i64 0) + %3 = call @llvm.riscv.vle.nxv2f32.p0.i64( zeroinitializer, ptr %arrayinit.element3067, i64 0) + %4 = call @llvm.riscv.vle.nxv2f32.p0.i64( zeroinitializer, ptr %arrayinit.element3065, i64 0) + %5 = call @llvm.riscv.vle.nxv2f32.p0.i64( zeroinitializer, ptr %arrayinit.element3063, i64 0) + %6 = call @llvm.riscv.vle.nxv2f32.p0.i64( zeroinitializer, ptr %arrayinit.element3055, i64 0) + %7 = call @llvm.riscv.vle.nxv2f32.p0.i64( zeroinitializer, ptr %arrayinit.element3057, i64 0) + %8 = call @llvm.riscv.vle.nxv2f32.p0.i64( zeroinitializer, ptr %arrayinit.element3053, i64 0) + %9 = call @llvm.riscv.vle.nxv2f64.p0.i64( zeroinitializer, ptr %arrayinit.element3051, i64 0) + %10 = tail call @llvm.riscv.vle.nxv2i32.p0.i64( zeroinitializer, ptr %arrayinit.element3047, i64 0) + %11 = tail call @llvm.riscv.vle.nxv2i32.p0.i64( zeroinitializer, ptr %arrayinit.element3049, i64 0) + call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i64(target("riscv.vector.tuple", , 4) zeroinitializer, ptr null, i64 0, i64 5) + %12 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", , 8) zeroinitializer, %8, i32 0) + %13 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", , 8) %12, %7, i32 2) + %14 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", , 8) %13, %6, i32 0) + %15 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", , 8) %14, %5, i32 0) + %16 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", , 8) %15, %4, i32 0) + %17 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", , 8) %16, %3, i32 0) + %18 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", , 8) %17, %2, i32 0) + call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", , 8) %18, ptr %arrayinit.element3045, i64 0, i64 5) + %19 = tail call @llvm.riscv.vmfgt.mask.nxv2f64.nxv2f64.i64( zeroinitializer, zeroinitializer, %9, zeroinitializer, i64 0) + %20 = tail call @llvm.riscv.vssub.mask.nxv2i32.nxv2i32.i64( %11, zeroinitializer, %10, %19, i64 0, i64 0) + call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv16i8_2t.p0.i64(target("riscv.vector.tuple", , 2) %0, ptr null, i64 %var_94_i.07698, i64 6) + call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv2i1.i64(target("riscv.vector.tuple", , 4) zeroinitializer, ptr null, zeroinitializer, i64 0, i64 6) + %21 = tail call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2i32(target("riscv.vector.tuple", , 8) poison, %20, i32 0) + call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", , 8) %21, ptr %var_117, i64 0, i64 5) + call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv16i8_4t.p0.i64(target("riscv.vector.tuple", , 4) %1, ptr null, i64 0, i64 6) + br label %for.body +} diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index dd9960d17af43..9c2fa9d0009a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -32,10 +32,10 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0, align 8) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x13 = SLLI $x12, 2 - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13 + ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0, align 8) - ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 + ; CHECK-NEXT: $x12 = SRLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 ; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) ; CHECK-NEXT: $x11 = ADDI $x2, 16 @@ -93,10 +93,10 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load () from %stack.0, align 8) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x13 = SLLI $x12, 1 - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13 + ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load () from %stack.0, align 8) - ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 + ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 ; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load () from %stack.0) ; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10