diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index fd5a5244486ab..4d5fa79389ea6 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen RISCVMacroFusion.cpp RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp + RISCVPostRAExpandPseudoInsts.cpp RISCVRedundantCopyElimination.cpp RISCVMoveMerger.cpp RISCVPushPopOptimizer.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 0efc915ea52c5..3d8e33dc716ea 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -63,6 +63,8 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &); FunctionPass *createRISCVInsertVSETVLIPass(); void initializeRISCVInsertVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVPostRAExpandPseudoPass(); +void initializeRISCVPostRAExpandPseudoPass(PassRegistry &); FunctionPass *createRISCVInsertReadWriteCSRPass(); void initializeRISCVInsertReadWriteCSRPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 81a1304cf1f40..6c156057ccd7d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -29,6 +29,12 @@ using namespace llvm; #define DEBUG_TYPE "riscv-isel" #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" +static cl::opt UsePseudoMovImm( + "riscv-use-rematerializable-movimm", cl::Hidden, + cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " + "constant materialization"), + cl::init(false)); + namespace llvm::RISCV { #define GET_RISCVVSSEGTable_IMPL #define GET_RISCVVLSEGTable_IMPL @@ -195,6 +201,13 @@ static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); + // Use a rematerializable pseudo instruction for short sequences if enabled. + if (Seq.size() == 2 && UsePseudoMovImm) + return SDValue( + CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT, + CurDAG->getTargetConstant(Imm, DL, VT)), + 0); + // See if we can create this constant as (ADD (SLLI X, C), X) where X is at // worst an LUI+ADDIW. This will require an extra register, but avoids a // constant pool. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 460f43bf60a25..1a9242cff0b44 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1664,6 +1664,16 @@ def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), [], "jump", "$target, $rd">, Sched<[WriteIALU, WriteJalr, ReadJalr]>; +// Pseudo for a rematerializable constant materialization sequence. +// This is an experimental feature enabled by +// -riscv-use-rematerializable-movimm in RISCVISelDAGToDAG.cpp +// It will be expanded after register allocation. +// FIXME: The scheduling information does not reflect the multiple instructions. +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 1, + isPseudo = 1, isReMaterializable = 1, IsSignExtendingOpW = 1 in +def PseudoMovImm : Pseudo<(outs GPR:$dst), (ins i32imm:$imm), []>, + Sched<[WriteIALU]>; + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0, isAsmParserOnly = 1 in def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], diff --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp new file mode 100644 index 0000000000000..407e7cfd6fef8 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp @@ -0,0 +1,155 @@ +//===-- RISCVPostRAExpandPseudoInsts.cpp - Expand pseudo instrs ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands the pseudo instruction pseudolisimm32 +// into target instructions. This pass should be run during the post-regalloc +// passes, before post RA scheduling. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/RISCVMatInt.h" +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define RISCV_POST_RA_EXPAND_PSEUDO_NAME \ + "RISC-V post-regalloc pseudo instruction expansion pass" + +namespace { + +class RISCVPostRAExpandPseudo : public MachineFunctionPass { +public: + const RISCVInstrInfo *TII; + static char ID; + + RISCVPostRAExpandPseudo() : MachineFunctionPass(ID) { + initializeRISCVPostRAExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return RISCV_POST_RA_EXPAND_PSEUDO_NAME; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); +}; + +char RISCVPostRAExpandPseudo::ID = 0; + +bool RISCVPostRAExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + +bool RISCVPostRAExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case RISCV::PseudoMovImm: + return expandMovImm(MBB, MBBI); + default: + return false; + } +} + +bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + + int64_t Val = MBBI->getOperand(1).getImm(); + + RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq( + Val, MBB.getParent()->getSubtarget().getFeatureBits()); + assert(!Seq.empty()); + + Register SrcReg = RISCV::X0; + Register DstReg = MBBI->getOperand(0).getReg(); + bool DstIsDead = MBBI->getOperand(0).isDead(); + bool Renamable = MBBI->getOperand(0).isRenamable(); + bool SrcRenamable = false; + unsigned Num = 0; + + for (RISCVMatInt::Inst &Inst : Seq) { + bool LastItem = ++Num == Seq.size(); + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | + getDeadRegState(DstIsDead && LastItem) | + getRenamableRegState(Renamable)) + .addImm(Inst.getImm()); + break; + case RISCVMatInt::RegX0: + BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | + getDeadRegState(DstIsDead && LastItem) | + getRenamableRegState(Renamable)) + .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable)) + .addReg(RISCV::X0); + break; + case RISCVMatInt::RegReg: + BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | + getDeadRegState(DstIsDead && LastItem) | + getRenamableRegState(Renamable)) + .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable)) + .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable)); + break; + case RISCVMatInt::RegImm: + BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode())) + .addReg(DstReg, RegState::Define | + getDeadRegState(DstIsDead && LastItem) | + getRenamableRegState(Renamable)) + .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable)) + .addImm(Inst.getImm()); + break; + } + // Only the first instruction has X0 as its source. + SrcReg = DstReg; + SrcRenamable = Renamable; + } + MBBI->eraseFromParent(); + return true; +} + +} // end of anonymous namespace + +INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32", + RISCV_POST_RA_EXPAND_PSEUDO_NAME, false, false) +namespace llvm { + +FunctionPass *createRISCVPostRAExpandPseudoPass() { + return new RISCVPostRAExpandPseudo(); +} + +} // end of namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 651d24bae5726..953ac097b9150 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -96,6 +96,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVCodeGenPreparePass(*PR); + initializeRISCVPostRAExpandPseudoPass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVOptWInstrsPass(*PR); initializeRISCVPreRAExpandPseudoPass(*PR); @@ -372,6 +373,8 @@ bool RISCVPassConfig::addGlobalInstructionSelect() { } void RISCVPassConfig::addPreSched2() { + addPass(createRISCVPostRAExpandPseudoPass()); + // Emit KCFI checks for indirect calls. addPass(createKCFIPass()); } diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index 01c7613201854..1d9af9df2f718 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -52,6 +52,7 @@ ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization ; CHECK-NEXT: Post-RA pseudo instruction expansion pass +; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Insert fentry calls diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 30b6e1e541394..cf0826096bd41 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -156,6 +156,7 @@ ; CHECK-NEXT: Tail Duplication ; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Post-RA pseudo instruction expansion pass +; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll index e191933b42338..cafcf72c022ff 100644 --- a/llvm/test/CodeGen/RISCV/imm.ll +++ b/llvm/test/CodeGen/RISCV/imm.ll @@ -14,6 +14,11 @@ ; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+xtheadbb \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IXTHEADBB +; RUN: llc -mtriple=riscv32 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \ +; RUN: -riscv-use-rematerializable-movimm | FileCheck %s -check-prefix=RV32-REMAT +; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \ +; RUN: -riscv-use-rematerializable-movimm | FileCheck %s -check-prefix=RV64-REMAT + ; Materializing constants ; TODO: It would be preferable if anyext constant returns were sign rather @@ -50,6 +55,16 @@ define signext i32 @zero() nounwind { ; RV64IXTHEADBB: # %bb.0: ; RV64IXTHEADBB-NEXT: li a0, 0 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: zero: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a0, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: zero: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, 0 +; RV64-REMAT-NEXT: ret ret i32 0 } @@ -83,6 +98,16 @@ define signext i32 @pos_small() nounwind { ; RV64IXTHEADBB: # %bb.0: ; RV64IXTHEADBB-NEXT: li a0, 2047 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: pos_small: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a0, 2047 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: pos_small: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, 2047 +; RV64-REMAT-NEXT: ret ret i32 2047 } @@ -116,6 +141,16 @@ define signext i32 @neg_small() nounwind { ; RV64IXTHEADBB: # %bb.0: ; RV64IXTHEADBB-NEXT: li a0, -2048 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: neg_small: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a0, -2048 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: neg_small: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -2048 +; RV64-REMAT-NEXT: ret ret i32 -2048 } @@ -155,6 +190,18 @@ define signext i32 @pos_i32() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 423811 ; RV64IXTHEADBB-NEXT: addiw a0, a0, -1297 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: pos_i32: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 423811 +; RV32-REMAT-NEXT: addi a0, a0, -1297 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: pos_i32: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 423811 +; RV64-REMAT-NEXT: addiw a0, a0, -1297 +; RV64-REMAT-NEXT: ret ret i32 1735928559 } @@ -194,6 +241,18 @@ define signext i32 @neg_i32() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 912092 ; RV64IXTHEADBB-NEXT: addiw a0, a0, -273 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: neg_i32: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 912092 +; RV32-REMAT-NEXT: addi a0, a0, -273 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: neg_i32: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 912092 +; RV64-REMAT-NEXT: addiw a0, a0, -273 +; RV64-REMAT-NEXT: ret ret i32 -559038737 } @@ -227,6 +286,16 @@ define signext i32 @pos_i32_hi20_only() nounwind { ; RV64IXTHEADBB: # %bb.0: ; RV64IXTHEADBB-NEXT: lui a0, 16 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: pos_i32_hi20_only: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 16 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: pos_i32_hi20_only: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 16 +; RV64-REMAT-NEXT: ret ret i32 65536 ; 0x10000 } @@ -260,6 +329,16 @@ define signext i32 @neg_i32_hi20_only() nounwind { ; RV64IXTHEADBB: # %bb.0: ; RV64IXTHEADBB-NEXT: lui a0, 1048560 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: neg_i32_hi20_only: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 1048560 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: neg_i32_hi20_only: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1048560 +; RV64-REMAT-NEXT: ret ret i32 -65536 ; -0x10000 } @@ -301,6 +380,18 @@ define signext i32 @imm_left_shifted_addi() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 32 ; RV64IXTHEADBB-NEXT: addiw a0, a0, -64 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_left_shifted_addi: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 32 +; RV32-REMAT-NEXT: addi a0, a0, -64 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_left_shifted_addi: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 32 +; RV64-REMAT-NEXT: addiw a0, a0, -64 +; RV64-REMAT-NEXT: ret ret i32 131008 ; 0x1FFC0 } @@ -342,6 +433,18 @@ define signext i32 @imm_right_shifted_addi() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 524288 ; RV64IXTHEADBB-NEXT: addiw a0, a0, -1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_right_shifted_addi: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 524288 +; RV32-REMAT-NEXT: addi a0, a0, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_right_shifted_addi: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 524288 +; RV64-REMAT-NEXT: addiw a0, a0, -1 +; RV64-REMAT-NEXT: ret ret i32 2147483647 ; 0x7FFFFFFF } @@ -383,6 +486,18 @@ define signext i32 @imm_right_shifted_lui() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 56 ; RV64IXTHEADBB-NEXT: addiw a0, a0, 580 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_right_shifted_lui: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 56 +; RV32-REMAT-NEXT: addi a0, a0, 580 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_right_shifted_lui: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 56 +; RV64-REMAT-NEXT: addiw a0, a0, 580 +; RV64-REMAT-NEXT: ret ret i32 229956 ; 0x38244 } @@ -421,6 +536,18 @@ define i64 @imm64_1() nounwind { ; RV64IXTHEADBB-NEXT: li a0, 1 ; RV64IXTHEADBB-NEXT: slli a0, a0, 31 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 524288 +; RV32-REMAT-NEXT: li a1, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, 1 +; RV64-REMAT-NEXT: slli a0, a0, 31 +; RV64-REMAT-NEXT: ret ret i64 2147483648 ; 0x8000_0000 } @@ -460,6 +587,18 @@ define i64 @imm64_2() nounwind { ; RV64IXTHEADBB-NEXT: li a0, -1 ; RV64IXTHEADBB-NEXT: srli a0, a0, 32 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_2: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a0, -1 +; RV32-REMAT-NEXT: li a1, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_2: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -1 +; RV64-REMAT-NEXT: srli a0, a0, 32 +; RV64-REMAT-NEXT: ret ret i64 4294967295 ; 0xFFFF_FFFF } @@ -498,6 +637,18 @@ define i64 @imm64_3() nounwind { ; RV64IXTHEADBB-NEXT: li a0, 1 ; RV64IXTHEADBB-NEXT: slli a0, a0, 32 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_3: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a1, 1 +; RV32-REMAT-NEXT: li a0, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_3: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, 1 +; RV64-REMAT-NEXT: slli a0, a0, 32 +; RV64-REMAT-NEXT: ret ret i64 4294967296 ; 0x1_0000_0000 } @@ -536,6 +687,18 @@ define i64 @imm64_4() nounwind { ; RV64IXTHEADBB-NEXT: li a0, -1 ; RV64IXTHEADBB-NEXT: slli a0, a0, 63 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_4: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a1, 524288 +; RV32-REMAT-NEXT: li a0, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_4: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -1 +; RV64-REMAT-NEXT: slli a0, a0, 63 +; RV64-REMAT-NEXT: ret ret i64 9223372036854775808 ; 0x8000_0000_0000_0000 } @@ -574,6 +737,18 @@ define i64 @imm64_5() nounwind { ; RV64IXTHEADBB-NEXT: li a0, -1 ; RV64IXTHEADBB-NEXT: slli a0, a0, 63 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_5: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a1, 524288 +; RV32-REMAT-NEXT: li a0, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_5: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -1 +; RV64-REMAT-NEXT: slli a0, a0, 63 +; RV64-REMAT-NEXT: ret ret i64 -9223372036854775808 ; 0x8000_0000_0000_0000 } @@ -619,6 +794,20 @@ define i64 @imm64_6() nounwind { ; RV64IXTHEADBB-NEXT: addi a0, a0, -1329 ; RV64IXTHEADBB-NEXT: slli a0, a0, 35 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_6: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a1, 74565 +; RV32-REMAT-NEXT: addi a1, a1, 1656 +; RV32-REMAT-NEXT: li a0, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_6: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 9321 +; RV64-REMAT-NEXT: addi a0, a0, -1329 +; RV64-REMAT-NEXT: slli a0, a0, 35 +; RV64-REMAT-NEXT: ret ret i64 1311768464867721216 ; 0x1234_5678_0000_0000 } @@ -674,6 +863,22 @@ define i64 @imm64_7() nounwind { ; RV64IXTHEADBB-NEXT: slli a0, a0, 24 ; RV64IXTHEADBB-NEXT: addi a0, a0, 15 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_7: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 45056 +; RV32-REMAT-NEXT: addi a0, a0, 15 +; RV32-REMAT-NEXT: lui a1, 458752 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_7: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, 7 +; RV64-REMAT-NEXT: slli a0, a0, 36 +; RV64-REMAT-NEXT: addi a0, a0, 11 +; RV64-REMAT-NEXT: slli a0, a0, 24 +; RV64-REMAT-NEXT: addi a0, a0, 15 +; RV64-REMAT-NEXT: ret ret i64 8070450532432478223 ; 0x7000_0000_0B00_000F } @@ -752,6 +957,26 @@ define i64 @imm64_8() nounwind { ; RV64IXTHEADBB-NEXT: slli a0, a0, 13 ; RV64IXTHEADBB-NEXT: addi a0, a0, -272 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_8: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 633806 +; RV32-REMAT-NEXT: addi a0, a0, -272 +; RV32-REMAT-NEXT: lui a1, 74565 +; RV32-REMAT-NEXT: addi a1, a1, 1656 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_8: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 583 +; RV64-REMAT-NEXT: addiw a0, a0, -1875 +; RV64-REMAT-NEXT: slli a0, a0, 14 +; RV64-REMAT-NEXT: addi a0, a0, -947 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1511 +; RV64-REMAT-NEXT: slli a0, a0, 13 +; RV64-REMAT-NEXT: addi a0, a0, -272 +; RV64-REMAT-NEXT: ret ret i64 1311768467463790320 ; 0x1234_5678_9ABC_DEF0 } @@ -786,6 +1011,17 @@ define i64 @imm64_9() nounwind { ; RV64IXTHEADBB: # %bb.0: ; RV64IXTHEADBB-NEXT: li a0, -1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_9: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a0, -1 +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_9: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -1 +; RV64-REMAT-NEXT: ret ret i64 -1 } @@ -828,6 +1064,18 @@ define i64 @imm_left_shifted_lui_1() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 262145 ; RV64IXTHEADBB-NEXT: slli a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_left_shifted_lui_1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 524290 +; RV32-REMAT-NEXT: li a1, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_left_shifted_lui_1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 262145 +; RV64-REMAT-NEXT: slli a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 2147491840 ; 0x8000_2000 } @@ -867,6 +1115,18 @@ define i64 @imm_left_shifted_lui_2() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 262145 ; RV64IXTHEADBB-NEXT: slli a0, a0, 2 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_left_shifted_lui_2: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 4 +; RV32-REMAT-NEXT: li a1, 1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_left_shifted_lui_2: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 262145 +; RV64-REMAT-NEXT: slli a0, a0, 2 +; RV64-REMAT-NEXT: ret ret i64 4294983680 ; 0x1_0000_4000 } @@ -907,6 +1167,19 @@ define i64 @imm_left_shifted_lui_3() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 4097 ; RV64IXTHEADBB-NEXT: slli a0, a0, 20 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_left_shifted_lui_3: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a1, 1 +; RV32-REMAT-NEXT: addi a1, a1, 1 +; RV32-REMAT-NEXT: li a0, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_left_shifted_lui_3: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 4097 +; RV64-REMAT-NEXT: slli a0, a0, 20 +; RV64-REMAT-NEXT: ret ret i64 17596481011712 ; 0x1001_0000_0000 } @@ -951,6 +1224,20 @@ define i64 @imm_right_shifted_lui_1() nounwind { ; RV64IXTHEADBB-NEXT: lui a0, 983056 ; RV64IXTHEADBB-NEXT: srli a0, a0, 16 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_right_shifted_lui_1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 1048575 +; RV32-REMAT-NEXT: addi a0, a0, 1 +; RV32-REMAT-NEXT: lui a1, 16 +; RV32-REMAT-NEXT: addi a1, a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_right_shifted_lui_1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 983056 +; RV64-REMAT-NEXT: srli a0, a0, 16 +; RV64-REMAT-NEXT: ret ret i64 281474976706561 ; 0xFFFF_FFFF_F001 } @@ -996,6 +1283,20 @@ define i64 @imm_right_shifted_lui_2() nounwind { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: srli a0, a0, 24 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_right_shifted_lui_2: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 1048575 +; RV32-REMAT-NEXT: addi a0, a0, 1 +; RV32-REMAT-NEXT: li a1, 255 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_right_shifted_lui_2: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1044481 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: srli a0, a0, 24 +; RV64-REMAT-NEXT: ret ret i64 1099511623681 ; 0xFF_FFFF_F001 } @@ -1043,6 +1344,19 @@ define i64 @imm_decoupled_lui_addi() nounwind { ; RV64IXTHEADBB-NEXT: slli a0, a0, 20 ; RV64IXTHEADBB-NEXT: addi a0, a0, -3 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_decoupled_lui_addi: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a0, -3 +; RV32-REMAT-NEXT: lui a1, 1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_decoupled_lui_addi: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 4097 +; RV64-REMAT-NEXT: slli a0, a0, 20 +; RV64-REMAT-NEXT: addi a0, a0, -3 +; RV64-REMAT-NEXT: ret ret i64 17596481011709 ; 0x1000_FFFF_FFFD } @@ -1090,6 +1404,20 @@ define i64 @imm_end_xori_1() nounwind { ; RV64IXTHEADBB-NEXT: srli a0, a0, 3 ; RV64IXTHEADBB-NEXT: not a0, a0 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_end_xori_1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 8192 +; RV32-REMAT-NEXT: addi a0, a0, -1 +; RV32-REMAT-NEXT: lui a1, 917504 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_end_xori_1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 983040 +; RV64-REMAT-NEXT: srli a0, a0, 3 +; RV64-REMAT-NEXT: not a0, a0 +; RV64-REMAT-NEXT: ret ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF } @@ -1143,6 +1471,22 @@ define i64 @imm_end_2addi_1() nounwind { ; RV64IXTHEADBB-NEXT: addi a0, a0, -2048 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_end_2addi_1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 1048575 +; RV32-REMAT-NEXT: addi a0, a0, 2047 +; RV32-REMAT-NEXT: lui a1, 1048512 +; RV32-REMAT-NEXT: addi a1, a1, 127 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_end_2addi_1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -2047 +; RV64-REMAT-NEXT: slli a0, a0, 39 +; RV64-REMAT-NEXT: addi a0, a0, -2048 +; RV64-REMAT-NEXT: addi a0, a0, -1 +; RV64-REMAT-NEXT: ret ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF } @@ -1196,6 +1540,21 @@ define i64 @imm_2reg_1() nounwind { ; RV64IXTHEADBB-NEXT: slli a1, a0, 57 ; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_2reg_1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 74565 +; RV32-REMAT-NEXT: addi a0, a0, 1656 +; RV32-REMAT-NEXT: lui a1, 983040 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_2reg_1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 74565 +; RV64-REMAT-NEXT: addiw a0, a0, 1656 +; RV64-REMAT-NEXT: slli a1, a0, 57 +; RV64-REMAT-NEXT: add a0, a0, a1 +; RV64-REMAT-NEXT: ret ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678 } @@ -1236,6 +1595,18 @@ define void @imm_store_i16_neg1(ptr %p) nounwind { ; RV64IXTHEADBB-NEXT: li a1, -1 ; RV64IXTHEADBB-NEXT: sh a1, 0(a0) ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_store_i16_neg1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: sh a1, 0(a0) +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_store_i16_neg1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a1, -1 +; RV64-REMAT-NEXT: sh a1, 0(a0) +; RV64-REMAT-NEXT: ret store i16 -1, ptr %p ret void } @@ -1277,6 +1648,18 @@ define void @imm_store_i32_neg1(ptr %p) nounwind { ; RV64IXTHEADBB-NEXT: li a1, -1 ; RV64IXTHEADBB-NEXT: sw a1, 0(a0) ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_store_i32_neg1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: sw a1, 0(a0) +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_store_i32_neg1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a1, -1 +; RV64-REMAT-NEXT: sw a1, 0(a0) +; RV64-REMAT-NEXT: ret store i32 -1, ptr %p ret void } @@ -1326,6 +1709,21 @@ define i64 @imm_5372288229() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 13 ; RV64IXTHEADBB-NEXT: addi a0, a0, -795 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_5372288229: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 263018 +; RV32-REMAT-NEXT: addi a0, a0, -795 +; RV32-REMAT-NEXT: li a1, 1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_5372288229: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 160 +; RV64-REMAT-NEXT: addiw a0, a0, 437 +; RV64-REMAT-NEXT: slli a0, a0, 13 +; RV64-REMAT-NEXT: addi a0, a0, -795 +; RV64-REMAT-NEXT: ret ret i64 5372288229 } @@ -1374,6 +1772,21 @@ define i64 @imm_neg_5372288229() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 13 ; RV64IXTHEADBB-NEXT: addi a0, a0, 795 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_5372288229: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 785558 +; RV32-REMAT-NEXT: addi a0, a0, 795 +; RV32-REMAT-NEXT: li a1, -2 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_5372288229: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1048416 +; RV64-REMAT-NEXT: addiw a0, a0, -437 +; RV64-REMAT-NEXT: slli a0, a0, 13 +; RV64-REMAT-NEXT: addi a0, a0, 795 +; RV64-REMAT-NEXT: ret ret i64 -5372288229 } @@ -1422,6 +1835,21 @@ define i64 @imm_8953813715() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 13 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1325 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_8953813715: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 88838 +; RV32-REMAT-NEXT: addi a0, a0, -1325 +; RV32-REMAT-NEXT: li a1, 2 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_8953813715: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 267 +; RV64-REMAT-NEXT: addiw a0, a0, -637 +; RV64-REMAT-NEXT: slli a0, a0, 13 +; RV64-REMAT-NEXT: addi a0, a0, -1325 +; RV64-REMAT-NEXT: ret ret i64 8953813715 } @@ -1470,6 +1898,21 @@ define i64 @imm_neg_8953813715() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 13 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1325 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_8953813715: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 959738 +; RV32-REMAT-NEXT: addi a0, a0, 1325 +; RV32-REMAT-NEXT: li a1, -3 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_8953813715: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1048309 +; RV64-REMAT-NEXT: addiw a0, a0, 637 +; RV64-REMAT-NEXT: slli a0, a0, 13 +; RV64-REMAT-NEXT: addi a0, a0, 1325 +; RV64-REMAT-NEXT: ret ret i64 -8953813715 } @@ -1519,6 +1962,21 @@ define i64 @imm_16116864687() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1711 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_16116864687: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 789053 +; RV32-REMAT-NEXT: addi a0, a0, 1711 +; RV32-REMAT-NEXT: li a1, 3 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_16116864687: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 961 +; RV64-REMAT-NEXT: addiw a0, a0, -1475 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1711 +; RV64-REMAT-NEXT: ret ret i64 16116864687 } @@ -1568,6 +2026,21 @@ define i64 @imm_neg_16116864687() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1711 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_16116864687: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 259523 +; RV32-REMAT-NEXT: addi a0, a0, -1711 +; RV32-REMAT-NEXT: li a1, -4 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_16116864687: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1047615 +; RV64-REMAT-NEXT: addiw a0, a0, 1475 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, -1711 +; RV64-REMAT-NEXT: ret ret i64 -16116864687 } @@ -1613,6 +2086,20 @@ define i64 @imm_2344336315() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 2 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_2344336315: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 572348 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: li a1, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_2344336315: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 143087 +; RV64-REMAT-NEXT: slli a0, a0, 2 +; RV64-REMAT-NEXT: addi a0, a0, -1093 +; RV64-REMAT-NEXT: ret ret i64 2344336315 ; 0x8bbbbbbb } @@ -1676,6 +2163,23 @@ define i64 @imm_70370820078523() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 14 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_70370820078523: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 506812 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: lui a1, 4 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_70370820078523: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 256 +; RV64-REMAT-NEXT: addiw a0, a0, 31 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, -273 +; RV64-REMAT-NEXT: slli a0, a0, 14 +; RV64-REMAT-NEXT: addi a0, a0, -1093 +; RV64-REMAT-NEXT: ret ret i64 70370820078523 ; 0x40007bbbbbbb } @@ -1725,6 +2229,21 @@ define i64 @imm_neg_9223372034778874949() { ; RV64IXTHEADBB-NEXT: slli a1, a0, 63 ; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_9223372034778874949: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 506812 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: lui a1, 524288 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_9223372034778874949: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 506812 +; RV64-REMAT-NEXT: addiw a0, a0, -1093 +; RV64-REMAT-NEXT: slli a1, a0, 63 +; RV64-REMAT-NEXT: add a0, a0, a1 +; RV64-REMAT-NEXT: ret ret i64 -9223372034778874949 ; 0x800000007bbbbbbb } @@ -1793,6 +2312,24 @@ define i64 @imm_neg_9223301666034697285() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 14 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_9223301666034697285: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 506812 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: lui a1, 524292 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_9223301666034697285: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 917505 +; RV64-REMAT-NEXT: slli a0, a0, 8 +; RV64-REMAT-NEXT: addi a0, a0, 31 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, -273 +; RV64-REMAT-NEXT: slli a0, a0, 14 +; RV64-REMAT-NEXT: addi a0, a0, -1093 +; RV64-REMAT-NEXT: ret ret i64 -9223301666034697285 ; 0x800040007bbbbbbb } @@ -1838,6 +2375,20 @@ define i64 @imm_neg_2219066437() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 2 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_2219066437: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 506812 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_2219066437: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 913135 +; RV64-REMAT-NEXT: slli a0, a0, 2 +; RV64-REMAT-NEXT: addi a0, a0, -1093 +; RV64-REMAT-NEXT: ret ret i64 -2219066437 ; 0xffffffff7bbbbbbb } @@ -1888,6 +2439,22 @@ define i64 @imm_neg_8798043653189() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 14 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_8798043653189: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 572348 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: lui a1, 1048575 +; RV32-REMAT-NEXT: addi a1, a1, 2047 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_8798043653189: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 917475 +; RV64-REMAT-NEXT: addiw a0, a0, -273 +; RV64-REMAT-NEXT: slli a0, a0, 14 +; RV64-REMAT-NEXT: addi a0, a0, -1093 +; RV64-REMAT-NEXT: ret ret i64 -8798043653189 ; 0xfffff7ff8bbbbbbb } @@ -1938,6 +2505,22 @@ define i64 @imm_9223372034904144827() { ; RV64IXTHEADBB-NEXT: slli a1, a0, 63 ; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_9223372034904144827: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 572348 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: lui a1, 524288 +; RV32-REMAT-NEXT: addi a1, a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_9223372034904144827: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 572348 +; RV64-REMAT-NEXT: addiw a0, a0, -1093 +; RV64-REMAT-NEXT: slli a1, a0, 63 +; RV64-REMAT-NEXT: add a0, a0, a1 +; RV64-REMAT-NEXT: ret ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb } @@ -2007,6 +2590,25 @@ define i64 @imm_neg_9223354442718100411() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 14 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1093 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_9223354442718100411: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 572348 +; RV32-REMAT-NEXT: addi a0, a0, -1093 +; RV32-REMAT-NEXT: lui a1, 524287 +; RV32-REMAT-NEXT: addi a1, a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_9223354442718100411: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 524287 +; RV64-REMAT-NEXT: slli a0, a0, 6 +; RV64-REMAT-NEXT: addi a0, a0, -29 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, -273 +; RV64-REMAT-NEXT: slli a0, a0, 14 +; RV64-REMAT-NEXT: addi a0, a0, -1093 +; RV64-REMAT-NEXT: ret ret i64 9223354442718100411 ; 0x7fffefff8bbbbbbb } @@ -2052,6 +2654,20 @@ define i64 @imm_2863311530() { ; RV64IXTHEADBB-NEXT: addiw a0, a0, 1365 ; RV64IXTHEADBB-NEXT: slli a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_2863311530: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 699051 +; RV32-REMAT-NEXT: addi a0, a0, -1366 +; RV32-REMAT-NEXT: li a1, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_2863311530: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 349525 +; RV64-REMAT-NEXT: addiw a0, a0, 1365 +; RV64-REMAT-NEXT: slli a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 2863311530 ; #0xaaaaaaaa } @@ -2097,6 +2713,20 @@ define i64 @imm_neg_2863311530() { ; RV64IXTHEADBB-NEXT: addiw a0, a0, -1365 ; RV64IXTHEADBB-NEXT: slli a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_2863311530: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 349525 +; RV32-REMAT-NEXT: addi a0, a0, 1366 +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_2863311530: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 699051 +; RV64-REMAT-NEXT: addiw a0, a0, -1365 +; RV64-REMAT-NEXT: slli a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 -2863311530 ; #0xffffffff55555556 } @@ -2141,6 +2771,20 @@ define i64 @imm_2147486378() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 31 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1365 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_2147486378: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 524288 +; RV32-REMAT-NEXT: addi a0, a0, 1365 +; RV32-REMAT-NEXT: li a1, 0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_2147486378: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, 1 +; RV64-REMAT-NEXT: slli a0, a0, 31 +; RV64-REMAT-NEXT: addi a0, a0, 1365 +; RV64-REMAT-NEXT: ret ret i64 2147485013 } @@ -2181,6 +2825,19 @@ define i64 @imm_neg_2147485013() { ; RV64IXTHEADBB-NEXT: lui a0, 524288 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1365 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_2147485013: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 524288 +; RV32-REMAT-NEXT: addi a0, a0, -1365 +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_2147485013: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 524288 +; RV64-REMAT-NEXT: addi a0, a0, -1365 +; RV64-REMAT-NEXT: ret ret i64 -2147485013 } @@ -2231,6 +2888,22 @@ define i64 @imm_12900924131259() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 24 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1979 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_12900924131259: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 765952 +; RV32-REMAT-NEXT: addi a0, a0, 1979 +; RV32-REMAT-NEXT: lui a1, 1 +; RV32-REMAT-NEXT: addi a1, a1, -1093 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_12900924131259: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 188 +; RV64-REMAT-NEXT: addiw a0, a0, -1093 +; RV64-REMAT-NEXT: slli a0, a0, 24 +; RV64-REMAT-NEXT: addi a0, a0, 1979 +; RV64-REMAT-NEXT: ret ret i64 12900924131259 } @@ -2274,6 +2947,19 @@ define i64 @imm_50394234880() { ; RV64IXTHEADBB-NEXT: addiw a0, a0, -1093 ; RV64IXTHEADBB-NEXT: slli a0, a0, 16 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_50394234880: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 768944 +; RV32-REMAT-NEXT: li a1, 11 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_50394234880: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 188 +; RV64-REMAT-NEXT: addiw a0, a0, -1093 +; RV64-REMAT-NEXT: slli a0, a0, 16 +; RV64-REMAT-NEXT: ret ret i64 50394234880 } @@ -2329,6 +3015,23 @@ define i64 @imm_12900936431479() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1911 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_12900936431479: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 768955 +; RV32-REMAT-NEXT: addi a0, a0, 1911 +; RV32-REMAT-NEXT: lui a1, 1 +; RV32-REMAT-NEXT: addi a1, a1, -1093 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_12900936431479: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 192239 +; RV64-REMAT-NEXT: slli a0, a0, 2 +; RV64-REMAT-NEXT: addi a0, a0, -1093 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1911 +; RV64-REMAT-NEXT: ret ret i64 12900936431479 } @@ -2384,6 +3087,23 @@ define i64 @imm_12900918536874() { ; RV64IXTHEADBB-NEXT: addi a0, a0, 1365 ; RV64IXTHEADBB-NEXT: slli a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_12900918536874: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 764587 +; RV32-REMAT-NEXT: addi a0, a0, -1366 +; RV32-REMAT-NEXT: lui a1, 1 +; RV32-REMAT-NEXT: addi a1, a1, -1093 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_12900918536874: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 384477 +; RV64-REMAT-NEXT: addiw a0, a0, 1365 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1365 +; RV64-REMAT-NEXT: slli a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 12900918536874 } @@ -2439,6 +3159,23 @@ define i64 @imm_12900925247761() { ; RV64IXTHEADBB-NEXT: addi a0, a0, -2048 ; RV64IXTHEADBB-NEXT: addi a0, a0, -1775 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_12900925247761: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 766225 +; RV32-REMAT-NEXT: addi a0, a0, 273 +; RV32-REMAT-NEXT: lui a1, 1 +; RV32-REMAT-NEXT: addi a1, a1, -1093 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_12900925247761: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 384478 +; RV64-REMAT-NEXT: addiw a0, a0, -1911 +; RV64-REMAT-NEXT: slli a0, a0, 13 +; RV64-REMAT-NEXT: addi a0, a0, -2048 +; RV64-REMAT-NEXT: addi a0, a0, -1775 +; RV64-REMAT-NEXT: ret ret i64 12900925247761 } @@ -2488,6 +3225,21 @@ define i64 @imm_7158272001() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_7158272001: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 699049 +; RV32-REMAT-NEXT: addi a0, a0, 1 +; RV32-REMAT-NEXT: li a1, 1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_7158272001: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 427 +; RV64-REMAT-NEXT: addiw a0, a0, -1367 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 7158272001 ; 0x0000_0001_aaaa_9001 } @@ -2537,6 +3289,21 @@ define i64 @imm_12884889601() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_12884889601: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 1048573 +; RV32-REMAT-NEXT: addi a0, a0, 1 +; RV32-REMAT-NEXT: li a1, 2 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_12884889601: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 768 +; RV64-REMAT-NEXT: addiw a0, a0, -3 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 12884889601 ; 0x0000_0002_ffff_d001 } @@ -2585,6 +3352,21 @@ define i64 @imm_neg_3435982847() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_3435982847: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 209713 +; RV32-REMAT-NEXT: addi a0, a0, 1 +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_3435982847: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1048371 +; RV64-REMAT-NEXT: addiw a0, a0, 817 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 -3435982847 ; 0xffff_ffff_3333_1001 } @@ -2633,6 +3415,21 @@ define i64 @imm_neg_5726842879() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_5726842879: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 698997 +; RV32-REMAT-NEXT: addi a0, a0, 1 +; RV32-REMAT-NEXT: li a1, -2 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_5726842879: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1048235 +; RV64-REMAT-NEXT: addiw a0, a0, -1419 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 -5726842879 ; 0xffff_fffe_aaa7_5001 } @@ -2681,6 +3478,21 @@ define i64 @imm_neg_10307948543() { ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: addi a0, a0, 1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm_neg_10307948543: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 629139 +; RV32-REMAT-NEXT: addi a0, a0, 1 +; RV32-REMAT-NEXT: li a1, -3 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm_neg_10307948543: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1047962 +; RV64-REMAT-NEXT: addiw a0, a0, -1645 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: addi a0, a0, 1 +; RV64-REMAT-NEXT: ret ret i64 -10307948543 ; 0xffff_fffd_9999_3001 } @@ -2724,6 +3536,20 @@ define i64 @li_rori_1() { ; RV64IXTHEADBB-NEXT: li a0, -18 ; RV64IXTHEADBB-NEXT: th.srri a0, a0, 21 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: li_rori_1: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a1, 1048567 +; RV32-REMAT-NEXT: addi a1, a1, 2047 +; RV32-REMAT-NEXT: li a0, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: li_rori_1: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -17 +; RV64-REMAT-NEXT: slli a0, a0, 43 +; RV64-REMAT-NEXT: addi a0, a0, -1 +; RV64-REMAT-NEXT: ret ret i64 -149533581377537 } @@ -2767,6 +3593,20 @@ define i64 @li_rori_2() { ; RV64IXTHEADBB-NEXT: li a0, -86 ; RV64IXTHEADBB-NEXT: th.srri a0, a0, 4 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: li_rori_2: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a1, 720896 +; RV32-REMAT-NEXT: addi a1, a1, -1 +; RV32-REMAT-NEXT: li a0, -6 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: li_rori_2: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -5 +; RV64-REMAT-NEXT: slli a0, a0, 60 +; RV64-REMAT-NEXT: addi a0, a0, -6 +; RV64-REMAT-NEXT: ret ret i64 -5764607523034234886 } @@ -2810,6 +3650,20 @@ define i64 @li_rori_3() { ; RV64IXTHEADBB-NEXT: li a0, -18 ; RV64IXTHEADBB-NEXT: th.srri a0, a0, 37 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: li_rori_3: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 491520 +; RV32-REMAT-NEXT: addi a0, a0, -1 +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: li_rori_3: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, -17 +; RV64-REMAT-NEXT: slli a0, a0, 27 +; RV64-REMAT-NEXT: addi a0, a0, -1 +; RV64-REMAT-NEXT: ret ret i64 -2281701377 } @@ -2853,6 +3707,19 @@ define i64 @PR54812() { ; RV64IXTHEADBB-NEXT: addiw a0, a0, 1407 ; RV64IXTHEADBB-NEXT: slli a0, a0, 12 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: PR54812: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 521599 +; RV32-REMAT-NEXT: li a1, -1 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: PR54812: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 1048447 +; RV64-REMAT-NEXT: addiw a0, a0, 1407 +; RV64-REMAT-NEXT: slli a0, a0, 12 +; RV64-REMAT-NEXT: ret ret i64 -2158497792; } @@ -2891,6 +3758,18 @@ define signext i32 @pos_2048() nounwind { ; RV64IXTHEADBB-NEXT: li a0, 1 ; RV64IXTHEADBB-NEXT: slli a0, a0, 11 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: pos_2048: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: li a0, 1 +; RV32-REMAT-NEXT: slli a0, a0, 11 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: pos_2048: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: li a0, 1 +; RV64-REMAT-NEXT: slli a0, a0, 11 +; RV64-REMAT-NEXT: ret ret i32 2048 } @@ -2941,6 +3820,21 @@ define i64 @imm64_same_lo_hi() nounwind { ; RV64IXTHEADBB-NEXT: slli a1, a0, 32 ; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_same_lo_hi: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 65793 +; RV32-REMAT-NEXT: addi a0, a0, 16 +; RV32-REMAT-NEXT: mv a1, a0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_same_lo_hi: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 65793 +; RV64-REMAT-NEXT: addiw a0, a0, 16 +; RV64-REMAT-NEXT: slli a1, a0, 32 +; RV64-REMAT-NEXT: add a0, a0, a1 +; RV64-REMAT-NEXT: ret ret i64 1157442765409226768 ; 0x0101010101010101 } @@ -2998,6 +3892,21 @@ define i64 @imm64_same_lo_hi_optsize() nounwind optsize { ; RV64IXTHEADBB-NEXT: slli a1, a0, 32 ; RV64IXTHEADBB-NEXT: add a0, a0, a1 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_same_lo_hi_optsize: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 65793 +; RV32-REMAT-NEXT: addi a0, a0, 16 +; RV32-REMAT-NEXT: mv a1, a0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_same_lo_hi_optsize: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 65793 +; RV64-REMAT-NEXT: addiw a0, a0, 16 +; RV64-REMAT-NEXT: slli a1, a0, 32 +; RV64-REMAT-NEXT: add a0, a0, a1 +; RV64-REMAT-NEXT: ret ret i64 1157442765409226768 ; 0x0101010101010101 } @@ -3067,5 +3976,23 @@ define i64 @imm64_same_lo_hi_negative() nounwind { ; RV64IXTHEADBB-NEXT: slli a0, a0, 15 ; RV64IXTHEADBB-NEXT: addi a0, a0, 128 ; RV64IXTHEADBB-NEXT: ret +; +; RV32-REMAT-LABEL: imm64_same_lo_hi_negative: +; RV32-REMAT: # %bb.0: +; RV32-REMAT-NEXT: lui a0, 526344 +; RV32-REMAT-NEXT: addi a0, a0, 128 +; RV32-REMAT-NEXT: mv a1, a0 +; RV32-REMAT-NEXT: ret +; +; RV64-REMAT-LABEL: imm64_same_lo_hi_negative: +; RV64-REMAT: # %bb.0: +; RV64-REMAT-NEXT: lui a0, 983297 +; RV64-REMAT-NEXT: slli a0, a0, 4 +; RV64-REMAT-NEXT: addi a0, a0, 257 +; RV64-REMAT-NEXT: slli a0, a0, 16 +; RV64-REMAT-NEXT: addi a0, a0, 257 +; RV64-REMAT-NEXT: slli a0, a0, 15 +; RV64-REMAT-NEXT: addi a0, a0, 128 +; RV64-REMAT-NEXT: ret ret i64 9259542123273814144 ; 0x8080808080808080 } diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll new file mode 100644 index 0000000000000..ef91334c5ff00 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr69586.ll @@ -0,0 +1,1980 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \ +; RUN: -riscv-use-rematerializable-movimm=false | FileCheck %s --check-prefix=NOREMAT +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \ +; RUN: --riscv-use-rematerializable-movimm=true | FileCheck %s --check-prefix=REMAT + +define void @test(ptr %0, ptr %1, i64 %2) { +; NOREMAT-LABEL: test: +; NOREMAT: # %bb.0: +; NOREMAT-NEXT: addi sp, sp, -368 +; NOREMAT-NEXT: .cfi_def_cfa_offset 368 +; NOREMAT-NEXT: sd ra, 360(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s0, 352(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s1, 344(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s2, 336(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s3, 328(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s4, 320(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s5, 312(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s6, 304(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s7, 296(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s8, 288(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s9, 280(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s10, 272(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s11, 264(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: .cfi_offset ra, -8 +; NOREMAT-NEXT: .cfi_offset s0, -16 +; NOREMAT-NEXT: .cfi_offset s1, -24 +; NOREMAT-NEXT: .cfi_offset s2, -32 +; NOREMAT-NEXT: .cfi_offset s3, -40 +; NOREMAT-NEXT: .cfi_offset s4, -48 +; NOREMAT-NEXT: .cfi_offset s5, -56 +; NOREMAT-NEXT: .cfi_offset s6, -64 +; NOREMAT-NEXT: .cfi_offset s7, -72 +; NOREMAT-NEXT: .cfi_offset s8, -80 +; NOREMAT-NEXT: .cfi_offset s9, -88 +; NOREMAT-NEXT: .cfi_offset s10, -96 +; NOREMAT-NEXT: .cfi_offset s11, -104 +; NOREMAT-NEXT: li a2, 32 +; NOREMAT-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; NOREMAT-NEXT: vle32.v v8, (a0) +; NOREMAT-NEXT: addi a2, a0, 512 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: addi a2, a0, 1024 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v10 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addi a2, a0, 1536 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: li a2, 1 +; NOREMAT-NEXT: slli a2, a2, 11 +; NOREMAT-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: li a4, 5 +; NOREMAT-NEXT: slli a2, a4, 9 +; NOREMAT-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: li a5, 3 +; NOREMAT-NEXT: slli a2, a5, 10 +; NOREMAT-NEXT: sd a2, 240(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: li a3, 7 +; NOREMAT-NEXT: slli a2, a3, 9 +; NOREMAT-NEXT: sd a2, 232(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: lui a2, 1 +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: li a2, 9 +; NOREMAT-NEXT: slli a6, a2, 9 +; NOREMAT-NEXT: sd a6, 224(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a6, a0, a6 +; NOREMAT-NEXT: vle32.v v14, (a6) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a6) +; NOREMAT-NEXT: slli a6, a4, 10 +; NOREMAT-NEXT: sd a6, 216(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a6, a0, a6 +; NOREMAT-NEXT: vle32.v v12, (a6) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a6) +; NOREMAT-NEXT: li s8, 11 +; NOREMAT-NEXT: slli a6, s8, 9 +; NOREMAT-NEXT: sd a6, 208(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a6, a0, a6 +; NOREMAT-NEXT: vle32.v v14, (a6) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a6) +; NOREMAT-NEXT: slli a5, a5, 11 +; NOREMAT-NEXT: sd a5, 200(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a5) +; NOREMAT-NEXT: li s2, 13 +; NOREMAT-NEXT: slli a5, s2, 9 +; NOREMAT-NEXT: sd a5, 192(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v14, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a5) +; NOREMAT-NEXT: slli a5, a3, 10 +; NOREMAT-NEXT: sd a5, 184(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a5) +; NOREMAT-NEXT: li t0, 15 +; NOREMAT-NEXT: slli a5, t0, 9 +; NOREMAT-NEXT: sd a5, 176(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v14, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a5) +; NOREMAT-NEXT: lui a5, 2 +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a5) +; NOREMAT-NEXT: li a5, 17 +; NOREMAT-NEXT: slli a5, a5, 9 +; NOREMAT-NEXT: sd a5, 168(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: li a7, 17 +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v14, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a5) +; NOREMAT-NEXT: slli a5, a2, 10 +; NOREMAT-NEXT: sd a5, 160(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a5) +; NOREMAT-NEXT: li a5, 19 +; NOREMAT-NEXT: slli a5, a5, 9 +; NOREMAT-NEXT: sd a5, 152(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: li a6, 19 +; NOREMAT-NEXT: add a5, a0, a5 +; NOREMAT-NEXT: vle32.v v14, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a5) +; NOREMAT-NEXT: slli a4, a4, 11 +; NOREMAT-NEXT: sd a4, 144(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v12, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: li s10, 21 +; NOREMAT-NEXT: slli a4, s10, 9 +; NOREMAT-NEXT: sd a4, 136(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v14, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a4) +; NOREMAT-NEXT: slli a4, s8, 10 +; NOREMAT-NEXT: sd a4, 128(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v12, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: li s6, 23 +; NOREMAT-NEXT: slli a4, s6, 9 +; NOREMAT-NEXT: sd a4, 120(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v14, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a4) +; NOREMAT-NEXT: lui a4, 3 +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v12, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: li s3, 25 +; NOREMAT-NEXT: slli a4, s3, 9 +; NOREMAT-NEXT: sd a4, 112(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v14, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a4) +; NOREMAT-NEXT: slli a4, s2, 10 +; NOREMAT-NEXT: sd a4, 104(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v12, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: li t5, 27 +; NOREMAT-NEXT: slli a4, t5, 9 +; NOREMAT-NEXT: sd a4, 96(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a4, a0, a4 +; NOREMAT-NEXT: vle32.v v14, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a4) +; NOREMAT-NEXT: slli a3, a3, 11 +; NOREMAT-NEXT: sd a3, 88(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a0, a3 +; NOREMAT-NEXT: vle32.v v12, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a3) +; NOREMAT-NEXT: li t2, 29 +; NOREMAT-NEXT: slli a3, t2, 9 +; NOREMAT-NEXT: sd a3, 80(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a0, a3 +; NOREMAT-NEXT: vle32.v v14, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a3) +; NOREMAT-NEXT: slli a3, t0, 10 +; NOREMAT-NEXT: sd a3, 72(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a0, a3 +; NOREMAT-NEXT: vle32.v v12, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a3) +; NOREMAT-NEXT: li a5, 31 +; NOREMAT-NEXT: slli a3, a5, 9 +; NOREMAT-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a0, a3 +; NOREMAT-NEXT: vle32.v v14, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a3) +; NOREMAT-NEXT: lui a4, 4 +; NOREMAT-NEXT: add a3, a0, a4 +; NOREMAT-NEXT: vle32.v v12, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a3) +; NOREMAT-NEXT: addiw a3, a4, 512 +; NOREMAT-NEXT: sd a3, 56(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a0, a3 +; NOREMAT-NEXT: vle32.v v14, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a3) +; NOREMAT-NEXT: slli a3, a7, 10 +; NOREMAT-NEXT: sd a3, 48(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a0, a3 +; NOREMAT-NEXT: vle32.v v12, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a3) +; NOREMAT-NEXT: addiw a3, a4, 1536 +; NOREMAT-NEXT: sd a3, 40(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a0, a3 +; NOREMAT-NEXT: vle32.v v14, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a3) +; NOREMAT-NEXT: slli a2, a2, 11 +; NOREMAT-NEXT: sd a2, 32(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: lui s1, 5 +; NOREMAT-NEXT: addiw a2, s1, -1536 +; NOREMAT-NEXT: sd a2, 24(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli a2, a6, 10 +; NOREMAT-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw a2, s1, -512 +; NOREMAT-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a0, a2 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: add a2, a0, s1 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw ra, s1, 512 +; NOREMAT-NEXT: add a2, a0, ra +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli s11, s10, 10 +; NOREMAT-NEXT: add a2, a0, s11 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw s10, s1, 1536 +; NOREMAT-NEXT: add a2, a0, s10 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli s9, s8, 11 +; NOREMAT-NEXT: add a2, a0, s9 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: lui t1, 6 +; NOREMAT-NEXT: addiw s8, t1, -1536 +; NOREMAT-NEXT: add a2, a0, s8 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli s7, s6, 10 +; NOREMAT-NEXT: add a2, a0, s7 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw s6, t1, -512 +; NOREMAT-NEXT: add a2, a0, s6 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: add a2, a0, t1 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw s5, t1, 512 +; NOREMAT-NEXT: add a2, a0, s5 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli s4, s3, 10 +; NOREMAT-NEXT: add a2, a0, s4 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw s3, t1, 1536 +; NOREMAT-NEXT: add a2, a0, s3 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli s2, s2, 11 +; NOREMAT-NEXT: add a2, a0, s2 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: lui a3, 7 +; NOREMAT-NEXT: addiw s0, a3, -1536 +; NOREMAT-NEXT: add a2, a0, s0 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli t6, t5, 10 +; NOREMAT-NEXT: add a2, a0, t6 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw t5, a3, -512 +; NOREMAT-NEXT: add a2, a0, t5 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: add a2, a0, a3 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw t4, a3, 512 +; NOREMAT-NEXT: add a2, a0, t4 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli t3, t2, 10 +; NOREMAT-NEXT: add a2, a0, t3 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: addiw t2, a3, 1536 +; NOREMAT-NEXT: add a2, a0, t2 +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a2) +; NOREMAT-NEXT: slli t0, t0, 11 +; NOREMAT-NEXT: add a2, a0, t0 +; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a2) +; NOREMAT-NEXT: lui a2, 8 +; NOREMAT-NEXT: addiw a7, a2, -1536 +; NOREMAT-NEXT: add a4, a0, a7 +; NOREMAT-NEXT: vle32.v v14, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a4) +; NOREMAT-NEXT: slli a6, a5, 10 +; NOREMAT-NEXT: add a4, a0, a6 +; NOREMAT-NEXT: vle32.v v12, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: addiw a5, a2, -512 +; NOREMAT-NEXT: add a4, a0, a5 +; NOREMAT-NEXT: vle32.v v14, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: vle32.v v10, (a4) +; NOREMAT-NEXT: add a0, a0, a2 +; NOREMAT-NEXT: vle32.v v12, (a0) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: addi a0, a1, 1024 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: addi a0, a1, 1536 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: lui a0, 1 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: lui a0, 2 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: lui a0, 3 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 80(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 72(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 64(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: lui a0, 4 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 40(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 32(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: add s1, a1, s1 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (s1) +; NOREMAT-NEXT: add ra, a1, ra +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (ra) +; NOREMAT-NEXT: add s11, a1, s11 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (s11) +; NOREMAT-NEXT: add s10, a1, s10 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (s10) +; NOREMAT-NEXT: add s9, a1, s9 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (s9) +; NOREMAT-NEXT: add s8, a1, s8 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (s8) +; NOREMAT-NEXT: add s7, a1, s7 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (s7) +; NOREMAT-NEXT: add s6, a1, s6 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (s6) +; NOREMAT-NEXT: add t1, a1, t1 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (t1) +; NOREMAT-NEXT: add s5, a1, s5 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (s5) +; NOREMAT-NEXT: add s4, a1, s4 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (s4) +; NOREMAT-NEXT: add s3, a1, s3 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (s3) +; NOREMAT-NEXT: add s2, a1, s2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (s2) +; NOREMAT-NEXT: add s0, a1, s0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (s0) +; NOREMAT-NEXT: add t6, a1, t6 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (t6) +; NOREMAT-NEXT: add t5, a1, t5 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (t5) +; NOREMAT-NEXT: add a3, a1, a3 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a3) +; NOREMAT-NEXT: add t4, a1, t4 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (t4) +; NOREMAT-NEXT: add t3, a1, t3 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (t3) +; NOREMAT-NEXT: add t2, a1, t2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (t2) +; NOREMAT-NEXT: add t0, a1, t0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (t0) +; NOREMAT-NEXT: add a7, a1, a7 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a7) +; NOREMAT-NEXT: add a6, a1, a6 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a6) +; NOREMAT-NEXT: add a5, a1, a5 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a5) +; NOREMAT-NEXT: add a0, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: addiw a0, a2, 512 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: addiw a0, a2, 1024 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: addiw a0, a2, 1536 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: li a0, 17 +; NOREMAT-NEXT: slli a0, a0, 11 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: lui a0, 9 +; NOREMAT-NEXT: addiw a2, a0, -1536 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a2) +; NOREMAT-NEXT: addiw a2, a0, -1024 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a2) +; NOREMAT-NEXT: addiw a2, a0, -512 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a2) +; NOREMAT-NEXT: add a2, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a2) +; NOREMAT-NEXT: addiw a2, a0, 512 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a2) +; NOREMAT-NEXT: addiw a2, a0, 1024 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a2) +; NOREMAT-NEXT: addiw a0, a0, 1536 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: li a0, 19 +; NOREMAT-NEXT: slli a0, a0, 11 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: lui a0, 10 +; NOREMAT-NEXT: addiw a2, a0, -1536 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a2) +; NOREMAT-NEXT: addiw a2, a0, -1024 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a2) +; NOREMAT-NEXT: addiw a2, a0, -512 +; NOREMAT-NEXT: add a2, a1, a2 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a2) +; NOREMAT-NEXT: add a2, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; NOREMAT-NEXT: vse32.v v8, (a2) +; NOREMAT-NEXT: addiw a0, a0, 512 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: vse32.v v10, (a0) +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: ld ra, 360(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s0, 352(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s1, 344(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s2, 336(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s3, 328(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s5, 312(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s6, 304(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s7, 296(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s8, 288(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s9, 280(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s10, 272(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s11, 264(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: addi sp, sp, 368 +; NOREMAT-NEXT: ret +; +; REMAT-LABEL: test: +; REMAT: # %bb.0: +; REMAT-NEXT: addi sp, sp, -112 +; REMAT-NEXT: .cfi_def_cfa_offset 112 +; REMAT-NEXT: sd ra, 104(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s0, 96(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s1, 88(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s2, 80(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s3, 72(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s4, 64(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s5, 56(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s6, 48(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s7, 40(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s8, 32(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s9, 24(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s10, 16(sp) # 8-byte Folded Spill +; REMAT-NEXT: sd s11, 8(sp) # 8-byte Folded Spill +; REMAT-NEXT: .cfi_offset ra, -8 +; REMAT-NEXT: .cfi_offset s0, -16 +; REMAT-NEXT: .cfi_offset s1, -24 +; REMAT-NEXT: .cfi_offset s2, -32 +; REMAT-NEXT: .cfi_offset s3, -40 +; REMAT-NEXT: .cfi_offset s4, -48 +; REMAT-NEXT: .cfi_offset s5, -56 +; REMAT-NEXT: .cfi_offset s6, -64 +; REMAT-NEXT: .cfi_offset s7, -72 +; REMAT-NEXT: .cfi_offset s8, -80 +; REMAT-NEXT: .cfi_offset s9, -88 +; REMAT-NEXT: .cfi_offset s10, -96 +; REMAT-NEXT: .cfi_offset s11, -104 +; REMAT-NEXT: li a2, 32 +; REMAT-NEXT: vsetvli zero, a2, e32, m2, ta, ma +; REMAT-NEXT: vle32.v v8, (a0) +; REMAT-NEXT: addi a2, a0, 512 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: addi a2, a0, 1024 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v10 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: addi a2, a0, 1536 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 1 +; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 5 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 3 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 7 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui a2, 1 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 9 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 5 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 11 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 3 +; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 13 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 7 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 15 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui a2, 2 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 17 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 9 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 19 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 5 +; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 21 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 11 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 23 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui a2, 3 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 25 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 13 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 27 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 7 +; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 29 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 15 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: li a2, 31 +; REMAT-NEXT: slli a2, a2, 9 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: addiw a2, a2, 512 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 17 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui a2, 4 +; REMAT-NEXT: addiw a2, a2, 1536 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 9 +; REMAT-NEXT: slli a2, a2, 11 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui a2, 5 +; REMAT-NEXT: addiw a2, a2, -1536 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a2, 19 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui ra, 5 +; REMAT-NEXT: addiw ra, ra, -512 +; REMAT-NEXT: add a2, a0, ra +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui s11, 5 +; REMAT-NEXT: add a2, a0, s11 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui s10, 5 +; REMAT-NEXT: addiw s10, s10, 512 +; REMAT-NEXT: add a2, a0, s10 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li s9, 21 +; REMAT-NEXT: slli s9, s9, 10 +; REMAT-NEXT: add a2, a0, s9 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui s8, 5 +; REMAT-NEXT: addiw s8, s8, 1536 +; REMAT-NEXT: add a2, a0, s8 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li s7, 11 +; REMAT-NEXT: slli s7, s7, 11 +; REMAT-NEXT: add a2, a0, s7 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui s6, 6 +; REMAT-NEXT: addiw s6, s6, -1536 +; REMAT-NEXT: add a2, a0, s6 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li s5, 23 +; REMAT-NEXT: slli s5, s5, 10 +; REMAT-NEXT: add a2, a0, s5 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui s4, 6 +; REMAT-NEXT: addiw s4, s4, -512 +; REMAT-NEXT: add a2, a0, s4 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui s3, 6 +; REMAT-NEXT: add a2, a0, s3 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui s2, 6 +; REMAT-NEXT: addiw s2, s2, 512 +; REMAT-NEXT: add a2, a0, s2 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li s1, 25 +; REMAT-NEXT: slli s1, s1, 10 +; REMAT-NEXT: add a2, a0, s1 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui s0, 6 +; REMAT-NEXT: addiw s0, s0, 1536 +; REMAT-NEXT: add a2, a0, s0 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li t6, 13 +; REMAT-NEXT: slli t6, t6, 11 +; REMAT-NEXT: add a2, a0, t6 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui t5, 7 +; REMAT-NEXT: addiw t5, t5, -1536 +; REMAT-NEXT: add a2, a0, t5 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li t4, 27 +; REMAT-NEXT: slli t4, t4, 10 +; REMAT-NEXT: add a2, a0, t4 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui t3, 7 +; REMAT-NEXT: addiw t3, t3, -512 +; REMAT-NEXT: add a2, a0, t3 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui t2, 7 +; REMAT-NEXT: add a2, a0, t2 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui t1, 7 +; REMAT-NEXT: addiw t1, t1, 512 +; REMAT-NEXT: add a2, a0, t1 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li t0, 29 +; REMAT-NEXT: slli t0, t0, 10 +; REMAT-NEXT: add a2, a0, t0 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui a7, 7 +; REMAT-NEXT: addiw a7, a7, 1536 +; REMAT-NEXT: add a2, a0, a7 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a6, 15 +; REMAT-NEXT: slli a6, a6, 11 +; REMAT-NEXT: add a2, a0, a6 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui a5, 8 +; REMAT-NEXT: addiw a5, a5, -1536 +; REMAT-NEXT: add a2, a0, a5 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: li a4, 31 +; REMAT-NEXT: slli a4, a4, 10 +; REMAT-NEXT: add a2, a0, a4 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: vle32.v v8, (a2) +; REMAT-NEXT: lui a3, 8 +; REMAT-NEXT: addiw a3, a3, -512 +; REMAT-NEXT: add a2, a0, a3 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: vle32.v v10, (a2) +; REMAT-NEXT: lui a2, 8 +; REMAT-NEXT: add a0, a0, a2 +; REMAT-NEXT: vle32.v v12, (a0) +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: addi a0, a1, 1024 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: addi a0, a1, 1536 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 1 +; REMAT-NEXT: slli a0, a0, 11 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 5 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 3 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 7 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 1 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 9 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 5 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 11 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 3 +; REMAT-NEXT: slli a0, a0, 11 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 13 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 7 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 15 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 2 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 17 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 9 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 19 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 5 +; REMAT-NEXT: slli a0, a0, 11 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 21 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 11 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 23 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 3 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 25 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 13 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 27 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 7 +; REMAT-NEXT: slli a0, a0, 11 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 29 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 15 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: li a0, 31 +; REMAT-NEXT: slli a0, a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 4 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 4 +; REMAT-NEXT: addiw a0, a0, 512 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 17 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 4 +; REMAT-NEXT: addiw a0, a0, 1536 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 9 +; REMAT-NEXT: slli a0, a0, 11 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 5 +; REMAT-NEXT: addiw a0, a0, -1536 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 19 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: add ra, a1, ra +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (ra) +; REMAT-NEXT: add s11, a1, s11 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (s11) +; REMAT-NEXT: add s10, a1, s10 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (s10) +; REMAT-NEXT: add s9, a1, s9 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (s9) +; REMAT-NEXT: add s8, a1, s8 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (s8) +; REMAT-NEXT: add s7, a1, s7 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (s7) +; REMAT-NEXT: add s6, a1, s6 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (s6) +; REMAT-NEXT: add s5, a1, s5 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (s5) +; REMAT-NEXT: add s4, a1, s4 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (s4) +; REMAT-NEXT: add s3, a1, s3 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (s3) +; REMAT-NEXT: add s2, a1, s2 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (s2) +; REMAT-NEXT: add s1, a1, s1 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (s1) +; REMAT-NEXT: add s0, a1, s0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (s0) +; REMAT-NEXT: add t6, a1, t6 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (t6) +; REMAT-NEXT: add t5, a1, t5 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (t5) +; REMAT-NEXT: add t4, a1, t4 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (t4) +; REMAT-NEXT: add t3, a1, t3 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (t3) +; REMAT-NEXT: add t2, a1, t2 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (t2) +; REMAT-NEXT: add t1, a1, t1 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (t1) +; REMAT-NEXT: add t0, a1, t0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (t0) +; REMAT-NEXT: add a7, a1, a7 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a7) +; REMAT-NEXT: add a6, a1, a6 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a6) +; REMAT-NEXT: add a5, a1, a5 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a5) +; REMAT-NEXT: add a4, a1, a4 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a4) +; REMAT-NEXT: add a3, a1, a3 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a3) +; REMAT-NEXT: add a2, a1, a2 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a2) +; REMAT-NEXT: lui a0, 8 +; REMAT-NEXT: addiw a0, a0, 512 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 8 +; REMAT-NEXT: addiw a0, a0, 1024 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 8 +; REMAT-NEXT: addiw a0, a0, 1536 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 17 +; REMAT-NEXT: slli a0, a0, 11 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 9 +; REMAT-NEXT: addiw a0, a0, -1536 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 9 +; REMAT-NEXT: addiw a0, a0, -1024 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 9 +; REMAT-NEXT: addiw a0, a0, -512 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 9 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 9 +; REMAT-NEXT: addiw a0, a0, 512 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 9 +; REMAT-NEXT: addiw a0, a0, 1024 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 9 +; REMAT-NEXT: addiw a0, a0, 1536 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: li a0, 19 +; REMAT-NEXT: slli a0, a0, 11 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 10 +; REMAT-NEXT: addiw a0, a0, -1536 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 10 +; REMAT-NEXT: addiw a0, a0, -1024 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 10 +; REMAT-NEXT: addiw a0, a0, -512 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: lui a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0 +; REMAT-NEXT: vse32.v v8, (a0) +; REMAT-NEXT: lui a0, 10 +; REMAT-NEXT: addiw a0, a0, 512 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: vse32.v v10, (a0) +; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; REMAT-NEXT: ld ra, 104(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s0, 96(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s1, 88(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s2, 80(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s3, 72(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s4, 64(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s6, 48(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s7, 40(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s8, 32(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s9, 24(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s10, 16(sp) # 8-byte Folded Reload +; REMAT-NEXT: ld s11, 8(sp) # 8-byte Folded Reload +; REMAT-NEXT: addi sp, sp, 112 +; REMAT-NEXT: ret + %4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 32, i64 2, i64 1) + %5 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %0, i64 %4) + %6 = getelementptr inbounds i32, ptr %0, i64 128 + %7 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %6, i64 %4) + %8 = getelementptr inbounds i32, ptr %0, i64 256 + %9 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %8, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %5, %7, i64 %4) + %10 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %8, i64 %4) + %11 = getelementptr inbounds i32, ptr %0, i64 384 + %12 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %11, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %7, %9, i64 %4) + %13 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %11, i64 %4) + %14 = getelementptr inbounds i32, ptr %0, i64 512 + %15 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %14, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %10, %12, i64 %4) + %16 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %14, i64 %4) + %17 = getelementptr inbounds i32, ptr %0, i64 640 + %18 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %17, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %13, %15, i64 %4) + %19 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %17, i64 %4) + %20 = getelementptr inbounds i32, ptr %0, i64 768 + %21 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %20, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %16, %18, i64 %4) + %22 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %20, i64 %4) + %23 = getelementptr inbounds i32, ptr %0, i64 896 + %24 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %23, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %19, %21, i64 %4) + %25 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %23, i64 %4) + %26 = getelementptr inbounds i32, ptr %0, i64 1024 + %27 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %26, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %22, %24, i64 %4) + %28 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %26, i64 %4) + %29 = getelementptr inbounds i32, ptr %0, i64 1152 + %30 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %29, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %25, %27, i64 %4) + %31 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %29, i64 %4) + %32 = getelementptr inbounds i32, ptr %0, i64 1280 + %33 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %32, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %28, %30, i64 %4) + %34 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %32, i64 %4) + %35 = getelementptr inbounds i32, ptr %0, i64 1408 + %36 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %35, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %31, %33, i64 %4) + %37 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %35, i64 %4) + %38 = getelementptr inbounds i32, ptr %0, i64 1536 + %39 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %38, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %34, %36, i64 %4) + %40 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %38, i64 %4) + %41 = getelementptr inbounds i32, ptr %0, i64 1664 + %42 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %41, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %37, %39, i64 %4) + %43 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %41, i64 %4) + %44 = getelementptr inbounds i32, ptr %0, i64 1792 + %45 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %44, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %40, %42, i64 %4) + %46 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %44, i64 %4) + %47 = getelementptr inbounds i32, ptr %0, i64 1920 + %48 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %47, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %43, %45, i64 %4) + %49 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %47, i64 %4) + %50 = getelementptr inbounds i32, ptr %0, i64 2048 + %51 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %50, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %46, %48, i64 %4) + %52 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %50, i64 %4) + %53 = getelementptr inbounds i32, ptr %0, i64 2176 + %54 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %53, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %49, %51, i64 %4) + %55 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %53, i64 %4) + %56 = getelementptr inbounds i32, ptr %0, i64 2304 + %57 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %56, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %52, %54, i64 %4) + %58 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %56, i64 %4) + %59 = getelementptr inbounds i32, ptr %0, i64 2432 + %60 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %59, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %55, %57, i64 %4) + %61 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %59, i64 %4) + %62 = getelementptr inbounds i32, ptr %0, i64 2560 + %63 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %62, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %58, %60, i64 %4) + %64 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %62, i64 %4) + %65 = getelementptr inbounds i32, ptr %0, i64 2688 + %66 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %65, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %61, %63, i64 %4) + %67 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %65, i64 %4) + %68 = getelementptr inbounds i32, ptr %0, i64 2816 + %69 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %68, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %64, %66, i64 %4) + %70 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %68, i64 %4) + %71 = getelementptr inbounds i32, ptr %0, i64 2944 + %72 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %71, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %67, %69, i64 %4) + %73 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %71, i64 %4) + %74 = getelementptr inbounds i32, ptr %0, i64 3072 + %75 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %74, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %70, %72, i64 %4) + %76 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %74, i64 %4) + %77 = getelementptr inbounds i32, ptr %0, i64 3200 + %78 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %77, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %73, %75, i64 %4) + %79 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %77, i64 %4) + %80 = getelementptr inbounds i32, ptr %0, i64 3328 + %81 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %80, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %76, %78, i64 %4) + %82 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %80, i64 %4) + %83 = getelementptr inbounds i32, ptr %0, i64 3456 + %84 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %83, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %79, %81, i64 %4) + %85 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %83, i64 %4) + %86 = getelementptr inbounds i32, ptr %0, i64 3584 + %87 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %86, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %82, %84, i64 %4) + %88 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %86, i64 %4) + %89 = getelementptr inbounds i32, ptr %0, i64 3712 + %90 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %89, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %85, %87, i64 %4) + %91 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %89, i64 %4) + %92 = getelementptr inbounds i32, ptr %0, i64 3840 + %93 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %92, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %88, %90, i64 %4) + %94 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %92, i64 %4) + %95 = getelementptr inbounds i32, ptr %0, i64 3968 + %96 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %95, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %91, %93, i64 %4) + %97 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %95, i64 %4) + %98 = getelementptr inbounds i32, ptr %0, i64 4096 + %99 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %98, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %94, %96, i64 %4) + %100 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %98, i64 %4) + %101 = getelementptr inbounds i32, ptr %0, i64 4224 + %102 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %101, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %97, %99, i64 %4) + %103 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %101, i64 %4) + %104 = getelementptr inbounds i32, ptr %0, i64 4352 + %105 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %104, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %100, %102, i64 %4) + %106 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %104, i64 %4) + %107 = getelementptr inbounds i32, ptr %0, i64 4480 + %108 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %107, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %103, %105, i64 %4) + %109 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %107, i64 %4) + %110 = getelementptr inbounds i32, ptr %0, i64 4608 + %111 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %110, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %106, %108, i64 %4) + %112 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %110, i64 %4) + %113 = getelementptr inbounds i32, ptr %0, i64 4736 + %114 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %113, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %109, %111, i64 %4) + %115 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %113, i64 %4) + %116 = getelementptr inbounds i32, ptr %0, i64 4864 + %117 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %116, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %112, %114, i64 %4) + %118 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %116, i64 %4) + %119 = getelementptr inbounds i32, ptr %0, i64 4992 + %120 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %119, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %115, %117, i64 %4) + %121 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %119, i64 %4) + %122 = getelementptr inbounds i32, ptr %0, i64 5120 + %123 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %122, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %118, %120, i64 %4) + %124 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %122, i64 %4) + %125 = getelementptr inbounds i32, ptr %0, i64 5248 + %126 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %125, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %121, %123, i64 %4) + %127 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %125, i64 %4) + %128 = getelementptr inbounds i32, ptr %0, i64 5376 + %129 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %128, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %124, %126, i64 %4) + %130 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %128, i64 %4) + %131 = getelementptr inbounds i32, ptr %0, i64 5504 + %132 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %131, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %127, %129, i64 %4) + %133 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %131, i64 %4) + %134 = getelementptr inbounds i32, ptr %0, i64 5632 + %135 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %134, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %130, %132, i64 %4) + %136 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %134, i64 %4) + %137 = getelementptr inbounds i32, ptr %0, i64 5760 + %138 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %137, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %133, %135, i64 %4) + %139 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %137, i64 %4) + %140 = getelementptr inbounds i32, ptr %0, i64 5888 + %141 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %140, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %136, %138, i64 %4) + %142 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %140, i64 %4) + %143 = getelementptr inbounds i32, ptr %0, i64 6016 + %144 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %143, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %139, %141, i64 %4) + %145 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %143, i64 %4) + %146 = getelementptr inbounds i32, ptr %0, i64 6144 + %147 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %146, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %142, %144, i64 %4) + %148 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %146, i64 %4) + %149 = getelementptr inbounds i32, ptr %0, i64 6272 + %150 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %149, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %145, %147, i64 %4) + %151 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %149, i64 %4) + %152 = getelementptr inbounds i32, ptr %0, i64 6400 + %153 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %152, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %148, %150, i64 %4) + %154 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %152, i64 %4) + %155 = getelementptr inbounds i32, ptr %0, i64 6528 + %156 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %155, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %151, %153, i64 %4) + %157 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %155, i64 %4) + %158 = getelementptr inbounds i32, ptr %0, i64 6656 + %159 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %158, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %154, %156, i64 %4) + %160 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %158, i64 %4) + %161 = getelementptr inbounds i32, ptr %0, i64 6784 + %162 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %161, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %157, %159, i64 %4) + %163 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %161, i64 %4) + %164 = getelementptr inbounds i32, ptr %0, i64 6912 + %165 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %164, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %160, %162, i64 %4) + %166 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %164, i64 %4) + %167 = getelementptr inbounds i32, ptr %0, i64 7040 + %168 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %167, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %163, %165, i64 %4) + %169 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %167, i64 %4) + %170 = getelementptr inbounds i32, ptr %0, i64 7168 + %171 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %170, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %166, %168, i64 %4) + %172 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %170, i64 %4) + %173 = getelementptr inbounds i32, ptr %0, i64 7296 + %174 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %173, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %169, %171, i64 %4) + %175 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %173, i64 %4) + %176 = getelementptr inbounds i32, ptr %0, i64 7424 + %177 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %176, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %172, %174, i64 %4) + %178 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %176, i64 %4) + %179 = getelementptr inbounds i32, ptr %0, i64 7552 + %180 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %179, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %175, %177, i64 %4) + %181 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %179, i64 %4) + %182 = getelementptr inbounds i32, ptr %0, i64 7680 + %183 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %182, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %178, %180, i64 %4) + %184 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %182, i64 %4) + %185 = getelementptr inbounds i32, ptr %0, i64 7808 + %186 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %185, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %181, %183, i64 %4) + %187 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %185, i64 %4) + %188 = getelementptr inbounds i32, ptr %0, i64 7936 + %189 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %188, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %184, %186, i64 %4) + %190 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %188, i64 %4) + %191 = getelementptr inbounds i32, ptr %0, i64 8064 + %192 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %191, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %187, %189, i64 %4) + %193 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %191, i64 %4) + %194 = getelementptr inbounds i32, ptr %0, i64 8192 + %195 = tail call @llvm.riscv.vle.nxv4i32.i64( poison, ptr %194, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %190, %192, i64 %4) + tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, %193, %195, i64 %4) + %196 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + %197 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + %198 = getelementptr inbounds i32, ptr %1, i64 256 + tail call void @llvm.riscv.vse.nxv4i32.i64( %196, ptr %198, i64 %4) + %199 = getelementptr inbounds i32, ptr %1, i64 384 + %200 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %197, ptr %199, i64 %4) + %201 = getelementptr inbounds i32, ptr %1, i64 512 + %202 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %200, ptr %201, i64 %4) + %203 = getelementptr inbounds i32, ptr %1, i64 640 + %204 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %202, ptr %203, i64 %4) + %205 = getelementptr inbounds i32, ptr %1, i64 768 + %206 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %204, ptr %205, i64 %4) + %207 = getelementptr inbounds i32, ptr %1, i64 896 + %208 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %206, ptr %207, i64 %4) + %209 = getelementptr inbounds i32, ptr %1, i64 1024 + %210 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %208, ptr %209, i64 %4) + %211 = getelementptr inbounds i32, ptr %1, i64 1152 + %212 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %210, ptr %211, i64 %4) + %213 = getelementptr inbounds i32, ptr %1, i64 1280 + %214 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %212, ptr %213, i64 %4) + %215 = getelementptr inbounds i32, ptr %1, i64 1408 + %216 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %214, ptr %215, i64 %4) + %217 = getelementptr inbounds i32, ptr %1, i64 1536 + %218 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %216, ptr %217, i64 %4) + %219 = getelementptr inbounds i32, ptr %1, i64 1664 + %220 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %218, ptr %219, i64 %4) + %221 = getelementptr inbounds i32, ptr %1, i64 1792 + %222 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %220, ptr %221, i64 %4) + %223 = getelementptr inbounds i32, ptr %1, i64 1920 + %224 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %222, ptr %223, i64 %4) + %225 = getelementptr inbounds i32, ptr %1, i64 2048 + %226 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %224, ptr %225, i64 %4) + %227 = getelementptr inbounds i32, ptr %1, i64 2176 + %228 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %226, ptr %227, i64 %4) + %229 = getelementptr inbounds i32, ptr %1, i64 2304 + %230 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %228, ptr %229, i64 %4) + %231 = getelementptr inbounds i32, ptr %1, i64 2432 + %232 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %230, ptr %231, i64 %4) + %233 = getelementptr inbounds i32, ptr %1, i64 2560 + %234 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %232, ptr %233, i64 %4) + %235 = getelementptr inbounds i32, ptr %1, i64 2688 + %236 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %234, ptr %235, i64 %4) + %237 = getelementptr inbounds i32, ptr %1, i64 2816 + %238 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %236, ptr %237, i64 %4) + %239 = getelementptr inbounds i32, ptr %1, i64 2944 + %240 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %238, ptr %239, i64 %4) + %241 = getelementptr inbounds i32, ptr %1, i64 3072 + %242 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %240, ptr %241, i64 %4) + %243 = getelementptr inbounds i32, ptr %1, i64 3200 + %244 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %242, ptr %243, i64 %4) + %245 = getelementptr inbounds i32, ptr %1, i64 3328 + %246 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %244, ptr %245, i64 %4) + %247 = getelementptr inbounds i32, ptr %1, i64 3456 + %248 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %246, ptr %247, i64 %4) + %249 = getelementptr inbounds i32, ptr %1, i64 3584 + %250 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %248, ptr %249, i64 %4) + %251 = getelementptr inbounds i32, ptr %1, i64 3712 + %252 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %250, ptr %251, i64 %4) + %253 = getelementptr inbounds i32, ptr %1, i64 3840 + %254 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %252, ptr %253, i64 %4) + %255 = getelementptr inbounds i32, ptr %1, i64 3968 + %256 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %254, ptr %255, i64 %4) + %257 = getelementptr inbounds i32, ptr %1, i64 4096 + %258 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %256, ptr %257, i64 %4) + %259 = getelementptr inbounds i32, ptr %1, i64 4224 + %260 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %258, ptr %259, i64 %4) + %261 = getelementptr inbounds i32, ptr %1, i64 4352 + %262 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %260, ptr %261, i64 %4) + %263 = getelementptr inbounds i32, ptr %1, i64 4480 + %264 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %262, ptr %263, i64 %4) + %265 = getelementptr inbounds i32, ptr %1, i64 4608 + %266 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %264, ptr %265, i64 %4) + %267 = getelementptr inbounds i32, ptr %1, i64 4736 + %268 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %266, ptr %267, i64 %4) + %269 = getelementptr inbounds i32, ptr %1, i64 4864 + %270 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %268, ptr %269, i64 %4) + %271 = getelementptr inbounds i32, ptr %1, i64 4992 + %272 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %270, ptr %271, i64 %4) + %273 = getelementptr inbounds i32, ptr %1, i64 5120 + %274 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %272, ptr %273, i64 %4) + %275 = getelementptr inbounds i32, ptr %1, i64 5248 + %276 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %274, ptr %275, i64 %4) + %277 = getelementptr inbounds i32, ptr %1, i64 5376 + %278 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %276, ptr %277, i64 %4) + %279 = getelementptr inbounds i32, ptr %1, i64 5504 + %280 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %278, ptr %279, i64 %4) + %281 = getelementptr inbounds i32, ptr %1, i64 5632 + %282 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %280, ptr %281, i64 %4) + %283 = getelementptr inbounds i32, ptr %1, i64 5760 + %284 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %282, ptr %283, i64 %4) + %285 = getelementptr inbounds i32, ptr %1, i64 5888 + %286 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %284, ptr %285, i64 %4) + %287 = getelementptr inbounds i32, ptr %1, i64 6016 + %288 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %286, ptr %287, i64 %4) + %289 = getelementptr inbounds i32, ptr %1, i64 6144 + %290 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %288, ptr %289, i64 %4) + %291 = getelementptr inbounds i32, ptr %1, i64 6272 + %292 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %290, ptr %291, i64 %4) + %293 = getelementptr inbounds i32, ptr %1, i64 6400 + %294 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %292, ptr %293, i64 %4) + %295 = getelementptr inbounds i32, ptr %1, i64 6528 + %296 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %294, ptr %295, i64 %4) + %297 = getelementptr inbounds i32, ptr %1, i64 6656 + %298 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %296, ptr %297, i64 %4) + %299 = getelementptr inbounds i32, ptr %1, i64 6784 + %300 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %298, ptr %299, i64 %4) + %301 = getelementptr inbounds i32, ptr %1, i64 6912 + %302 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %300, ptr %301, i64 %4) + %303 = getelementptr inbounds i32, ptr %1, i64 7040 + %304 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %302, ptr %303, i64 %4) + %305 = getelementptr inbounds i32, ptr %1, i64 7168 + %306 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %304, ptr %305, i64 %4) + %307 = getelementptr inbounds i32, ptr %1, i64 7296 + %308 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %306, ptr %307, i64 %4) + %309 = getelementptr inbounds i32, ptr %1, i64 7424 + %310 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %308, ptr %309, i64 %4) + %311 = getelementptr inbounds i32, ptr %1, i64 7552 + %312 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %310, ptr %311, i64 %4) + %313 = getelementptr inbounds i32, ptr %1, i64 7680 + %314 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %312, ptr %313, i64 %4) + %315 = getelementptr inbounds i32, ptr %1, i64 7808 + %316 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %314, ptr %315, i64 %4) + %317 = getelementptr inbounds i32, ptr %1, i64 7936 + %318 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %316, ptr %317, i64 %4) + %319 = getelementptr inbounds i32, ptr %1, i64 8064 + %320 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %318, ptr %319, i64 %4) + %321 = getelementptr inbounds i32, ptr %1, i64 8192 + %322 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %320, ptr %321, i64 %4) + %323 = getelementptr inbounds i32, ptr %1, i64 8320 + %324 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %322, ptr %323, i64 %4) + %325 = getelementptr inbounds i32, ptr %1, i64 8448 + %326 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %324, ptr %325, i64 %4) + %327 = getelementptr inbounds i32, ptr %1, i64 8576 + %328 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %326, ptr %327, i64 %4) + %329 = getelementptr inbounds i32, ptr %1, i64 8704 + %330 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %328, ptr %329, i64 %4) + %331 = getelementptr inbounds i32, ptr %1, i64 8832 + %332 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %330, ptr %331, i64 %4) + %333 = getelementptr inbounds i32, ptr %1, i64 8960 + %334 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %332, ptr %333, i64 %4) + %335 = getelementptr inbounds i32, ptr %1, i64 9088 + %336 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %334, ptr %335, i64 %4) + %337 = getelementptr inbounds i32, ptr %1, i64 9216 + %338 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %336, ptr %337, i64 %4) + %339 = getelementptr inbounds i32, ptr %1, i64 9344 + %340 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %338, ptr %339, i64 %4) + %341 = getelementptr inbounds i32, ptr %1, i64 9472 + %342 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %340, ptr %341, i64 %4) + %343 = getelementptr inbounds i32, ptr %1, i64 9600 + %344 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %342, ptr %343, i64 %4) + %345 = getelementptr inbounds i32, ptr %1, i64 9728 + %346 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %344, ptr %345, i64 %4) + %347 = getelementptr inbounds i32, ptr %1, i64 9856 + %348 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %346, ptr %347, i64 %4) + %349 = getelementptr inbounds i32, ptr %1, i64 9984 + %350 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %348, ptr %349, i64 %4) + %351 = getelementptr inbounds i32, ptr %1, i64 10112 + %352 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %350, ptr %351, i64 %4) + %353 = getelementptr inbounds i32, ptr %1, i64 10240 + %354 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %352, ptr %353, i64 %4) + %355 = getelementptr inbounds i32, ptr %1, i64 10368 + %356 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + tail call void @llvm.riscv.vse.nxv4i32.i64( %354, ptr %355, i64 %4) + %357 = tail call @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4) + ret void +} + +declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64) +declare @llvm.riscv.vle.nxv4i32.i64(, ptr, i64) +declare void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64, i64, , , i64) +declare @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64, i64, i64, i64) +declare void @llvm.riscv.vse.nxv4i32.i64(, ptr, i64)