From 9e143aade13bbf4b4eaa1c9a09d2563376222739 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 19 Nov 2025 15:43:42 +0000 Subject: [PATCH 01/13] [ISel] Introduce llvm.clmul[rh] intrinsics In line with a std proposal to introduce the llvm.clmul[rh] family of intrinsics corresponding to carry-less multiply operations. This work builds upon 727ee7e ([APInt] Introduce carry-less multiply primitives), and follow-up patches will introduce custom-lowering on supported targets, replacing target-specific clmul intrinsics. Testing is done on the RISC-V target, which should be sufficient to prove that the intrinsic works, since no RISC-V specific lowering has been added. Ref: https://isocpp.org/files/papers/P3642R3.html Co-authored-by: Oscar Smith --- llvm/docs/LangRef.rst | 101 +- llvm/include/llvm/CodeGen/ISDOpcodes.h | 5 + llvm/include/llvm/CodeGen/TargetLowering.h | 5 + llvm/include/llvm/IR/Intrinsics.td | 4 + .../include/llvm/Target/TargetSelectionDAG.td | 4 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 6 + .../SelectionDAG/LegalizeIntegerTypes.cpp | 14 + llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 12 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 + .../SelectionDAG/SelectionDAGBuilder.cpp | 16 + .../SelectionDAG/SelectionDAGDumper.cpp | 3 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 43 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 + llvm/test/CodeGen/RISCV/clmul.ll | 7582 +++++ llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll | 24188 ++++++++++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-clmul.ll | 19366 +++++++++++++ 17 files changed, 51357 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/clmul.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 734778f73af5f..a33e2bdceafb8 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -18291,8 +18291,6 @@ then the result is the size in bits of the type of ``src`` if ``is_zero_poison == 0`` and ``poison`` otherwise. For example, ``llvm.cttz(2) = 1``. -.. _int_overflow: - .. _int_fshl: '``llvm.fshl.*``' Intrinsic @@ -18389,6 +18387,105 @@ Example: %r = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ; %r = i8: 225 (0b11100001) %r = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ; %r = i8: 255 (0b11111111) +.. _int_clmul: + +'``llvm.clmul.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.clmul`` on any integer +or vectors of integer elements. + +:: + + declare i16 @llvm.clmul.i16(i16 %a, i16 %b) + declare i32 @llvm.clmul.i32(i32 %a, i32 %b) + declare i64 @llvm.clmul.i64(i64 %a, i64 %b) + declare <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b) + +Overview: +""""""""" + +The '``llvm.clmul``' family of intrinsic functions performs carry-less +multiplication, or XOR multiplication, on the two arguments, and returns +the low-bits. + +Arguments: +"""""""""" + +The arguments may be any integer type or vector of integer type. Both arguments +and result must have the same type. + +Semantics: +"""""""""" + +The '``llvm.clmul``' intrinsic computes carry-less multiply of its arguments, +which is the result of applying the standard Eucledian multiplication algorithm, +where all of the additions are replaced with XORs, and returns the low-bits. +The vector variants operate lane-wise. + +Example: +"""""""" + +.. code-block:: llvm + + %r = call i4 @llvm.clmul.i4(i4 1, i4 2) ; %r = 2 + %r = call i4 @llvm.clmul.i4(i4 5, i4 6) ; %r = 14 + %r = call i4 @llvm.clmul.i4(i4 -4, i4 2) ; %r = -8 + %r = call i4 @llvm.clmul.i4(i4 -4, i4 -5) ; %r = 4 + +'``llvm.clmulr.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.clmulr`` on any integer +or vectors of integer elements. + +:: + + declare i16 @llvm.clmulr.i16(i16 %a, i16 %b) + declare i32 @llvm.clmulr.i32(i32 %a, i32 %b) + declare i64 @llvm.clmulr.i64(i64 %a, i64 %b) + declare <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %a, <4 x i32> %b) + +Overview: +""""""""" + +The '``llvm.clmulr``' family of intrinsic functions performs reversed +carry-less multiplication on the two arguments. + +Arguments: +"""""""""" + +The arguments may be any integer type or vector of integer type. Both arguments +and result must have the same type. + +Semantics: +"""""""""" + +The '``llvm.clmulr``' intrinsic computes reversed carry-less multiply of its +arguments. The vector variants operate lane-wise. + +.. code-block:: text + + clmulr(%a, %b) = bitreverse(clmul(bitreverse(%a), bitreverse(%b))) + +Example: +"""""""" + +.. code-block:: llvm + + %r = call i4 @llvm.clmulr.i4(i4 1, i4 2) ; %r = 0 + %r = call i4 @llvm.clmulr.i4(i4 5, i4 6) ; %r = 3 + %r = call i4 @llvm.clmulr.i4(i4 -4, i4 2) ; %r = 3 + %r = call i4 @llvm.clmulr.i4(i4 -4, i4 -5) ; %r = -2 + +.. _int_overflow: + Arithmetic with Overflow Intrinsics ----------------------------------- diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index cdaa916548c25..08d87f7e7b266 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -767,6 +767,11 @@ enum NodeType { FSHL, FSHR, + /// Carry-less multiplication operations. + CLMUL, + CLMULR, + CLMULH, + /// Byte Swap and Counting operators. BSWAP, CTTZ, diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 7df5d8a09f0f6..4c904cffcafaa 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5457,6 +5457,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase { /// \returns The expansion if successful, SDValue() otherwise SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const; + /// Expand carryless multiply. + /// \param N Node to expand + /// \returns The expansion if successful, SDValue() otherwise + SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const; + /// Expand rotations. /// \param N Node to expand /// \param AllowVectorOps expand vector rotate, this should only be performed diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 8f3cc54747074..fb8857cec2075 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1465,6 +1465,10 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_clmulr : DefaultAttrsIntrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; } let IntrProperties = [IntrNoMem, IntrSpeculatable, diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index a9750a5ab03f9..6c5024845dc6d 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -441,6 +441,10 @@ def sra_parts : SDNode<"ISD::SRA_PARTS" , SDTIntShiftPairOp>; def srl_parts : SDNode<"ISD::SRL_PARTS" , SDTIntShiftPairOp>; def fshl : SDNode<"ISD::FSHL" , SDTIntShiftDOp>; def fshr : SDNode<"ISD::FSHR" , SDTIntShiftDOp>; +def clmul : SDNode<"ISD::CLMUL" , SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def clmulr : SDNode<"ISD::CLMULR" , SDTIntBinOp, [SDNPCommutative]>; +def clmulh : SDNode<"ISD::CLMULH" , SDTIntBinOp, [SDNPCommutative]>; def and : SDNode<"ISD::AND" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def or : SDNode<"ISD::OR" , SDTIntBinOp, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 99d14a60c6ed1..4e9cbbb85c129 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4095,6 +4095,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) Results.push_back(Expanded); break; + case ISD::CLMUL: + case ISD::CLMULR: + case ISD::CLMULH: + if (SDValue Expanded = TLI.expandCLMUL(Node, DAG)) + Results.push_back(Expanded); + break; case ISD::ROTL: case ISD::ROTR: if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG)) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 44e5a187c4281..ec3327c85f248 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -204,6 +204,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::SUB: case ISD::MUL: + case ISD::CLMUL: + case ISD::CLMULR: + case ISD::CLMULH: case ISD::VP_AND: case ISD::VP_OR: case ISD::VP_XOR: @@ -3162,6 +3165,12 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { ExpandIntRes_FunnelShift(N, Lo, Hi); break; + case ISD::CLMUL: + case ISD::CLMULR: + case ISD::CLMULH: + ExpandIntRes_CLMUL(N, Lo, Hi); + break; + case ISD::VSCALE: ExpandIntRes_VSCALE(N, Lo, Hi); break; @@ -5492,6 +5501,11 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo, Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt); } +void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Res = TLI.expandCLMUL(N, DAG); + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index ede522eff6df3..ee9f519e249ed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -513,6 +513,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 10d5f7a9b4f65..7f077940cb2a5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -173,6 +173,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::CLMUL: + case ISD::CLMULR: + case ISD::CLMULH: case ISD::SADDSAT: case ISD::UADDSAT: @@ -1372,6 +1375,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::VP_ADD: case ISD::SUB: case ISD::VP_SUB: case ISD::MUL: case ISD::VP_MUL: + case ISD::CLMUL: + case ISD::CLMULR: + case ISD::CLMULH: case ISD::MULHS: case ISD::MULHU: case ISD::ABDS: @@ -4924,6 +4930,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::VP_SHL: case ISD::SRA: case ISD::VP_SRA: case ISD::SRL: case ISD::VP_SRL: + case ISD::CLMUL: + case ISD::CLMULR: + case ISD::CLMULH: case ISD::FMINNUM: case ISD::FMINNUM_IEEE: case ISD::VP_FMINNUM: @@ -7064,6 +7073,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::LLROUND: case ISD::LRINT: case ISD::LLRINT: + case ISD::CLMUL: + case ISD::CLMULR: + case ISD::CLMULH: Res = WidenVecOp_UnrollVectorOp(N); break; case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1b15a207a2d37..1e2c7c13a4457 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6789,6 +6789,12 @@ static std::optional FoldValue(unsigned Opcode, const APInt &C1, return APIntOps::mulhs(C1, C2); case ISD::MULHU: return APIntOps::mulhu(C1, C2); + case ISD::CLMUL: + return APIntOps::clmul(C1, C2); + case ISD::CLMULR: + return APIntOps::clmulr(C1, C2); + case ISD::CLMULH: + return APIntOps::clmulh(C1, C2); } return std::nullopt; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 985a54ca83256..428eaeb3a1dde 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7279,6 +7279,22 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } return; } + case Intrinsic::clmul: + case Intrinsic::clmulr: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + unsigned Opcode; + switch (Intrinsic) { + case Intrinsic::clmul: + Opcode = ISD::CLMUL; + break; + case Intrinsic::clmulr: + Opcode = ISD::CLMULR; + break; + } + setValue(&I, DAG.getNode(Opcode, sdl, Op1.getValueType(), Op1, Op2)); + return; + } case Intrinsic::sadd_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index ec5edd5f13978..d537236afb41c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -299,6 +299,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ROTR: return "rotr"; case ISD::FSHL: return "fshl"; case ISD::FSHR: return "fshr"; + case ISD::CLMUL: return "clmul"; + case ISD::CLMULR: return "clmulr"; + case ISD::CLMULH: return "clmulh"; case ISD::FADD: return "fadd"; case ISD::STRICT_FADD: return "strict_fadd"; case ISD::FSUB: return "fsub"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5684e0e4c26c4..7db1dad5b4426 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8302,6 +8302,49 @@ SDValue TargetLowering::expandFunnelShift(SDNode *Node, return DAG.getNode(ISD::OR, DL, VT, ShX, ShY); } +SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue X = Node->getOperand(0); + SDValue Y = Node->getOperand(1); + unsigned BW = VT.getScalarSizeInBits(); + + if (VT.isVector() && isOperationLegalOrCustomOrPromote( + Node->getOpcode(), VT.getVectorElementType())) + return DAG.UnrollVectorOp(Node); + + SDValue Res = DAG.getConstant(0, DL, VT); + switch (Node->getOpcode()) { + case ISD::CLMUL: { + for (unsigned I = 0; I < BW; ++I) { + SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT); + SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, X, YMasked); + Res = DAG.getNode(ISD::XOR, DL, VT, Res, Mul); + } + break; + } + case ISD::CLMULR: { + SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X); + SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X); + SDValue ResR = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev); + Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ResR); + break; + } + case ISD::CLMULH: { + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 2 * BW); + SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X); + SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y); + SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt); + SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul, + DAG.getShiftAmountConstant(BW, VT, DL)); + Res = DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits); + break; + } + } + return Res; +} + // TODO: Merge with expandFunnelShift. SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, SelectionDAG &DAG) const { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index f9d727eaf1e20..9fd05236418b1 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -913,6 +913,9 @@ void TargetLoweringBase::initActions() { // Absolute difference setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand); + // Carry-less multiply + setOperationAction({ISD::CLMUL, ISD::CLMULR, ISD::CLMULH}, VT, Expand); + // Saturated trunc setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Expand); setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Expand); diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll new file mode 100644 index 0000000000000..1e3acd8ccce74 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/clmul.ll @@ -0,0 +1,7582 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32IM +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64IM + +define i4 @clmul_i4(i4 %a, i4 %b) nounwind { +; RV32IM-LABEL: clmul_i4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -48 +; RV32IM-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t6, a1, 2 +; RV32IM-NEXT: andi s1, a1, 1 +; RV32IM-NEXT: andi a7, a1, 4 +; RV32IM-NEXT: andi t2, a1, 8 +; RV32IM-NEXT: andi t0, a1, 16 +; RV32IM-NEXT: andi t3, a1, 32 +; RV32IM-NEXT: andi a2, a1, 64 +; RV32IM-NEXT: andi t4, a1, 128 +; RV32IM-NEXT: andi s0, a1, 256 +; RV32IM-NEXT: andi a3, a1, 512 +; RV32IM-NEXT: li a4, 1 +; RV32IM-NEXT: lui a5, 1 +; RV32IM-NEXT: lui a6, 2 +; RV32IM-NEXT: lui t1, 4 +; RV32IM-NEXT: lui t5, 8 +; RV32IM-NEXT: lui s2, 16 +; RV32IM-NEXT: lui s3, 32 +; RV32IM-NEXT: lui s4, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: xor t6, s1, t6 +; RV32IM-NEXT: lui s1, 8192 +; RV32IM-NEXT: mul a7, a0, a7 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: xor a7, a7, t2 +; RV32IM-NEXT: lui t2, 16384 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: xor t0, t0, t3 +; RV32IM-NEXT: lui t3, 32768 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: xor t4, t4, s0 +; RV32IM-NEXT: lui s0, 65536 +; RV32IM-NEXT: xor a7, t6, a7 +; RV32IM-NEXT: lui t6, 131072 +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: xor a2, t0, a2 +; RV32IM-NEXT: lui t0, 262144 +; RV32IM-NEXT: mul a3, a0, a3 +; RV32IM-NEXT: xor a3, t4, a3 +; RV32IM-NEXT: lui t4, 524288 +; RV32IM-NEXT: slli a4, a4, 11 +; RV32IM-NEXT: and a5, a1, a5 +; RV32IM-NEXT: and a6, a1, a6 +; RV32IM-NEXT: and t1, a1, t1 +; RV32IM-NEXT: and t5, a1, t5 +; RV32IM-NEXT: and s2, a1, s2 +; RV32IM-NEXT: and s3, a1, s3 +; RV32IM-NEXT: and s4, a1, s4 +; RV32IM-NEXT: and s5, a1, s5 +; RV32IM-NEXT: and s6, a1, s6 +; RV32IM-NEXT: and s7, a1, s7 +; RV32IM-NEXT: and s8, a1, s8 +; RV32IM-NEXT: and s9, a1, s9 +; RV32IM-NEXT: and s10, a1, s10 +; RV32IM-NEXT: and s1, a1, s1 +; RV32IM-NEXT: and t2, a1, t2 +; RV32IM-NEXT: and t3, a1, t3 +; RV32IM-NEXT: and s0, a1, s0 +; RV32IM-NEXT: and t6, a1, t6 +; RV32IM-NEXT: and t0, a1, t0 +; RV32IM-NEXT: and t4, a1, t4 +; RV32IM-NEXT: and a4, a1, a4 +; RV32IM-NEXT: andi a1, a1, 1024 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: mul a5, a0, a5 +; RV32IM-NEXT: mul a6, a0, a6 +; RV32IM-NEXT: mul t1, a0, t1 +; RV32IM-NEXT: mul t5, a0, t5 +; RV32IM-NEXT: mul s2, a0, s2 +; RV32IM-NEXT: mul s3, a0, s3 +; RV32IM-NEXT: mul s4, a0, s4 +; RV32IM-NEXT: mul s5, a0, s5 +; RV32IM-NEXT: mul s6, a0, s6 +; RV32IM-NEXT: mul s7, a0, s7 +; RV32IM-NEXT: mul s8, a0, s8 +; RV32IM-NEXT: mul s9, a0, s9 +; RV32IM-NEXT: mul s10, a0, s10 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul a0, a0, a4 +; RV32IM-NEXT: xor a4, t1, t5 +; RV32IM-NEXT: xor t1, s5, s6 +; RV32IM-NEXT: xor t2, s1, t2 +; RV32IM-NEXT: xor a2, a7, a2 +; RV32IM-NEXT: xor a1, a3, a1 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a3, a4, s2 +; RV32IM-NEXT: xor a4, t1, s7 +; RV32IM-NEXT: xor a5, t2, t3 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: xor a2, a3, s3 +; RV32IM-NEXT: xor a3, a4, s8 +; RV32IM-NEXT: xor a5, a5, s0 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, a2, s4 +; RV32IM-NEXT: xor a2, a3, s9 +; RV32IM-NEXT: xor a3, a5, t6 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, s10 +; RV32IM-NEXT: xor a2, a3, t0 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, t4 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 48 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmul_i4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi t2, a1, 2 +; RV64IM-NEXT: andi t4, a1, 1 +; RV64IM-NEXT: andi a6, a1, 4 +; RV64IM-NEXT: andi t0, a1, 8 +; RV64IM-NEXT: andi a5, a1, 16 +; RV64IM-NEXT: andi a7, a1, 32 +; RV64IM-NEXT: andi a3, a1, 64 +; RV64IM-NEXT: andi t1, a1, 128 +; RV64IM-NEXT: andi t3, a1, 256 +; RV64IM-NEXT: andi a4, a1, 512 +; RV64IM-NEXT: li a2, 1 +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: lui t6, 2 +; RV64IM-NEXT: lui s0, 4 +; RV64IM-NEXT: lui s1, 8 +; RV64IM-NEXT: lui s2, 16 +; RV64IM-NEXT: lui s3, 32 +; RV64IM-NEXT: lui s4, 64 +; RV64IM-NEXT: lui s5, 128 +; RV64IM-NEXT: lui s6, 256 +; RV64IM-NEXT: lui s8, 512 +; RV64IM-NEXT: lui s9, 1024 +; RV64IM-NEXT: lui s10, 2048 +; RV64IM-NEXT: lui s11, 4096 +; RV64IM-NEXT: lui ra, 8192 +; RV64IM-NEXT: lui t5, 16384 +; RV64IM-NEXT: mul t2, a0, t2 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: xor t2, t4, t2 +; RV64IM-NEXT: lui t4, 32768 +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: mul t0, a0, t0 +; RV64IM-NEXT: xor a6, a6, t0 +; RV64IM-NEXT: lui t0, 65536 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: mul a7, a0, a7 +; RV64IM-NEXT: xor a5, a5, a7 +; RV64IM-NEXT: lui a7, 131072 +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: mul t3, a0, t3 +; RV64IM-NEXT: xor t1, t1, t3 +; RV64IM-NEXT: lui t3, 262144 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a6, t2, a6 +; RV64IM-NEXT: sd a6, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a6, a2, 11 +; RV64IM-NEXT: sd a6, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s7, a1, s7 +; RV64IM-NEXT: and a6, a1, t6 +; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: and s2, a1, s2 +; RV64IM-NEXT: and s3, a1, s3 +; RV64IM-NEXT: and a6, a1, s4 +; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, s5 +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: and s8, a1, s8 +; RV64IM-NEXT: and t6, a1, s9 +; RV64IM-NEXT: sd t6, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s10 +; RV64IM-NEXT: sd t6, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s11 +; RV64IM-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: and t5, a1, t5 +; RV64IM-NEXT: and t4, a1, t4 +; RV64IM-NEXT: sd t4, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, t3 +; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a2, 32 +; RV64IM-NEXT: xor a3, t1, a4 +; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, a2, 33 +; RV64IM-NEXT: mul a3, a0, s0 +; RV64IM-NEXT: mul a4, a0, s1 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s0, a2, 34 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: mul a4, a0, t2 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s1, a2, 35 +; RV64IM-NEXT: mul a3, a0, t6 +; RV64IM-NEXT: mul a4, a0, t5 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, a2, 36 +; RV64IM-NEXT: slli t6, a2, 37 +; RV64IM-NEXT: slli s5, a2, 38 +; RV64IM-NEXT: slli s6, a2, 39 +; RV64IM-NEXT: slli s9, a2, 40 +; RV64IM-NEXT: slli s10, a2, 41 +; RV64IM-NEXT: slli s11, a2, 42 +; RV64IM-NEXT: slli ra, a2, 43 +; RV64IM-NEXT: slli a3, a2, 44 +; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 45 +; RV64IM-NEXT: sd a3, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 46 +; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 47 +; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 48 +; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 49 +; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 50 +; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 51 +; RV64IM-NEXT: sd a3, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 52 +; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 53 +; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 54 +; RV64IM-NEXT: sd a3, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, a2, 55 +; RV64IM-NEXT: slli t0, a2, 56 +; RV64IM-NEXT: slli a7, a2, 57 +; RV64IM-NEXT: slli a6, a2, 58 +; RV64IM-NEXT: slli a5, a2, 59 +; RV64IM-NEXT: slli a4, a2, 60 +; RV64IM-NEXT: slli a3, a2, 61 +; RV64IM-NEXT: slli a2, a2, 62 +; RV64IM-NEXT: ld t2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t3, a1, t2 +; RV64IM-NEXT: and t2, a1, t4 +; RV64IM-NEXT: sd t2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t4, a1, s4 +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, t5 +; RV64IM-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, t6 +; RV64IM-NEXT: and t2, a1, s5 +; RV64IM-NEXT: sd t2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: sd t2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s4, a1, s9 +; RV64IM-NEXT: and s5, a1, s10 +; RV64IM-NEXT: and s6, a1, s11 +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, a1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, a1, t2 +; RV64IM-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and ra, a1, t2 +; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, a1, t1 +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, a6 +; RV64IM-NEXT: and a5, a1, a5 +; RV64IM-NEXT: sd a5, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a4, a1, a4 +; RV64IM-NEXT: sd a4, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a1, a3 +; RV64IM-NEXT: sd a3, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a2, a1, a2 +; RV64IM-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, a1, 1024 +; RV64IM-NEXT: srliw a3, a1, 31 +; RV64IM-NEXT: srli a1, a1, 63 +; RV64IM-NEXT: mul s9, a0, a2 +; RV64IM-NEXT: slli a3, a3, 31 +; RV64IM-NEXT: slli a1, a1, 63 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul a2, a0, s3 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s0, a0, s8 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a2 +; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a5, a0, t3 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s1, a0, a1 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul a1, a0, t6 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s10 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s10, a0, s11 +; RV64IM-NEXT: mul s11, a0, ra +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t3, a0, a1 +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s3, a0, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: ld a6, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: ld t1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, t6 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, s8 +; RV64IM-NEXT: ld s8, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s9, s8, s9 +; RV64IM-NEXT: xor a5, a5, s7 +; RV64IM-NEXT: ld s7, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, s7, s2 +; RV64IM-NEXT: ld s7, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s7, s0 +; RV64IM-NEXT: ld s7, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s7, t2 +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: xor t4, s1, s4 +; RV64IM-NEXT: xor s1, s10, s11 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: xor a0, a0, s9 +; RV64IM-NEXT: ld a2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s2, a5 +; RV64IM-NEXT: ld s2, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, s2 +; RV64IM-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, s2 +; RV64IM-NEXT: xor a7, a7, t5 +; RV64IM-NEXT: xor t4, t4, s5 +; RV64IM-NEXT: xor t5, s1, ra +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s0, a3 +; RV64IM-NEXT: ld a5, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: ld t2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t2 +; RV64IM-NEXT: xor t2, t4, s6 +; RV64IM-NEXT: xor t0, t5, t0 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: ld a4, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld a5, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: xor a7, t0, t3 +; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a7, s3 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a1, a1, t6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i4 @llvm.clmul.i4(i4 %a, i4 %b) + ret i4 %res +} + +define i8 @clmul_i8(i8 %a, i8 %b) nounwind { +; RV32IM-LABEL: clmul_i8: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -48 +; RV32IM-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t6, a1, 2 +; RV32IM-NEXT: andi s1, a1, 1 +; RV32IM-NEXT: andi a7, a1, 4 +; RV32IM-NEXT: andi t2, a1, 8 +; RV32IM-NEXT: andi t0, a1, 16 +; RV32IM-NEXT: andi t3, a1, 32 +; RV32IM-NEXT: andi a2, a1, 64 +; RV32IM-NEXT: andi t4, a1, 128 +; RV32IM-NEXT: andi s0, a1, 256 +; RV32IM-NEXT: andi a3, a1, 512 +; RV32IM-NEXT: li a4, 1 +; RV32IM-NEXT: lui a5, 1 +; RV32IM-NEXT: lui a6, 2 +; RV32IM-NEXT: lui t1, 4 +; RV32IM-NEXT: lui t5, 8 +; RV32IM-NEXT: lui s2, 16 +; RV32IM-NEXT: lui s3, 32 +; RV32IM-NEXT: lui s4, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: xor t6, s1, t6 +; RV32IM-NEXT: lui s1, 8192 +; RV32IM-NEXT: mul a7, a0, a7 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: xor a7, a7, t2 +; RV32IM-NEXT: lui t2, 16384 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: xor t0, t0, t3 +; RV32IM-NEXT: lui t3, 32768 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: xor t4, t4, s0 +; RV32IM-NEXT: lui s0, 65536 +; RV32IM-NEXT: xor a7, t6, a7 +; RV32IM-NEXT: lui t6, 131072 +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: xor a2, t0, a2 +; RV32IM-NEXT: lui t0, 262144 +; RV32IM-NEXT: mul a3, a0, a3 +; RV32IM-NEXT: xor a3, t4, a3 +; RV32IM-NEXT: lui t4, 524288 +; RV32IM-NEXT: slli a4, a4, 11 +; RV32IM-NEXT: and a5, a1, a5 +; RV32IM-NEXT: and a6, a1, a6 +; RV32IM-NEXT: and t1, a1, t1 +; RV32IM-NEXT: and t5, a1, t5 +; RV32IM-NEXT: and s2, a1, s2 +; RV32IM-NEXT: and s3, a1, s3 +; RV32IM-NEXT: and s4, a1, s4 +; RV32IM-NEXT: and s5, a1, s5 +; RV32IM-NEXT: and s6, a1, s6 +; RV32IM-NEXT: and s7, a1, s7 +; RV32IM-NEXT: and s8, a1, s8 +; RV32IM-NEXT: and s9, a1, s9 +; RV32IM-NEXT: and s10, a1, s10 +; RV32IM-NEXT: and s1, a1, s1 +; RV32IM-NEXT: and t2, a1, t2 +; RV32IM-NEXT: and t3, a1, t3 +; RV32IM-NEXT: and s0, a1, s0 +; RV32IM-NEXT: and t6, a1, t6 +; RV32IM-NEXT: and t0, a1, t0 +; RV32IM-NEXT: and t4, a1, t4 +; RV32IM-NEXT: and a4, a1, a4 +; RV32IM-NEXT: andi a1, a1, 1024 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: mul a5, a0, a5 +; RV32IM-NEXT: mul a6, a0, a6 +; RV32IM-NEXT: mul t1, a0, t1 +; RV32IM-NEXT: mul t5, a0, t5 +; RV32IM-NEXT: mul s2, a0, s2 +; RV32IM-NEXT: mul s3, a0, s3 +; RV32IM-NEXT: mul s4, a0, s4 +; RV32IM-NEXT: mul s5, a0, s5 +; RV32IM-NEXT: mul s6, a0, s6 +; RV32IM-NEXT: mul s7, a0, s7 +; RV32IM-NEXT: mul s8, a0, s8 +; RV32IM-NEXT: mul s9, a0, s9 +; RV32IM-NEXT: mul s10, a0, s10 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul a0, a0, a4 +; RV32IM-NEXT: xor a4, t1, t5 +; RV32IM-NEXT: xor t1, s5, s6 +; RV32IM-NEXT: xor t2, s1, t2 +; RV32IM-NEXT: xor a2, a7, a2 +; RV32IM-NEXT: xor a1, a3, a1 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a3, a4, s2 +; RV32IM-NEXT: xor a4, t1, s7 +; RV32IM-NEXT: xor a5, t2, t3 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: xor a2, a3, s3 +; RV32IM-NEXT: xor a3, a4, s8 +; RV32IM-NEXT: xor a5, a5, s0 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, a2, s4 +; RV32IM-NEXT: xor a2, a3, s9 +; RV32IM-NEXT: xor a3, a5, t6 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, s10 +; RV32IM-NEXT: xor a2, a3, t0 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, t4 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 48 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmul_i8: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi t2, a1, 2 +; RV64IM-NEXT: andi t4, a1, 1 +; RV64IM-NEXT: andi a6, a1, 4 +; RV64IM-NEXT: andi t0, a1, 8 +; RV64IM-NEXT: andi a5, a1, 16 +; RV64IM-NEXT: andi a7, a1, 32 +; RV64IM-NEXT: andi a3, a1, 64 +; RV64IM-NEXT: andi t1, a1, 128 +; RV64IM-NEXT: andi t3, a1, 256 +; RV64IM-NEXT: andi a4, a1, 512 +; RV64IM-NEXT: li a2, 1 +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: lui t6, 2 +; RV64IM-NEXT: lui s0, 4 +; RV64IM-NEXT: lui s1, 8 +; RV64IM-NEXT: lui s2, 16 +; RV64IM-NEXT: lui s3, 32 +; RV64IM-NEXT: lui s4, 64 +; RV64IM-NEXT: lui s5, 128 +; RV64IM-NEXT: lui s6, 256 +; RV64IM-NEXT: lui s8, 512 +; RV64IM-NEXT: lui s9, 1024 +; RV64IM-NEXT: lui s10, 2048 +; RV64IM-NEXT: lui s11, 4096 +; RV64IM-NEXT: lui ra, 8192 +; RV64IM-NEXT: lui t5, 16384 +; RV64IM-NEXT: mul t2, a0, t2 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: xor t2, t4, t2 +; RV64IM-NEXT: lui t4, 32768 +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: mul t0, a0, t0 +; RV64IM-NEXT: xor a6, a6, t0 +; RV64IM-NEXT: lui t0, 65536 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: mul a7, a0, a7 +; RV64IM-NEXT: xor a5, a5, a7 +; RV64IM-NEXT: lui a7, 131072 +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: mul t3, a0, t3 +; RV64IM-NEXT: xor t1, t1, t3 +; RV64IM-NEXT: lui t3, 262144 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a6, t2, a6 +; RV64IM-NEXT: sd a6, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a6, a2, 11 +; RV64IM-NEXT: sd a6, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s7, a1, s7 +; RV64IM-NEXT: and a6, a1, t6 +; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: and s2, a1, s2 +; RV64IM-NEXT: and s3, a1, s3 +; RV64IM-NEXT: and a6, a1, s4 +; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, s5 +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: and s8, a1, s8 +; RV64IM-NEXT: and t6, a1, s9 +; RV64IM-NEXT: sd t6, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s10 +; RV64IM-NEXT: sd t6, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s11 +; RV64IM-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: and t5, a1, t5 +; RV64IM-NEXT: and t4, a1, t4 +; RV64IM-NEXT: sd t4, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, t3 +; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a2, 32 +; RV64IM-NEXT: xor a3, t1, a4 +; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, a2, 33 +; RV64IM-NEXT: mul a3, a0, s0 +; RV64IM-NEXT: mul a4, a0, s1 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s0, a2, 34 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: mul a4, a0, t2 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s1, a2, 35 +; RV64IM-NEXT: mul a3, a0, t6 +; RV64IM-NEXT: mul a4, a0, t5 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, a2, 36 +; RV64IM-NEXT: slli t6, a2, 37 +; RV64IM-NEXT: slli s5, a2, 38 +; RV64IM-NEXT: slli s6, a2, 39 +; RV64IM-NEXT: slli s9, a2, 40 +; RV64IM-NEXT: slli s10, a2, 41 +; RV64IM-NEXT: slli s11, a2, 42 +; RV64IM-NEXT: slli ra, a2, 43 +; RV64IM-NEXT: slli a3, a2, 44 +; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 45 +; RV64IM-NEXT: sd a3, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 46 +; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 47 +; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 48 +; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 49 +; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 50 +; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 51 +; RV64IM-NEXT: sd a3, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 52 +; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 53 +; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 54 +; RV64IM-NEXT: sd a3, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, a2, 55 +; RV64IM-NEXT: slli t0, a2, 56 +; RV64IM-NEXT: slli a7, a2, 57 +; RV64IM-NEXT: slli a6, a2, 58 +; RV64IM-NEXT: slli a5, a2, 59 +; RV64IM-NEXT: slli a4, a2, 60 +; RV64IM-NEXT: slli a3, a2, 61 +; RV64IM-NEXT: slli a2, a2, 62 +; RV64IM-NEXT: ld t2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t3, a1, t2 +; RV64IM-NEXT: and t2, a1, t4 +; RV64IM-NEXT: sd t2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t4, a1, s4 +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, t5 +; RV64IM-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, t6 +; RV64IM-NEXT: and t2, a1, s5 +; RV64IM-NEXT: sd t2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: sd t2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s4, a1, s9 +; RV64IM-NEXT: and s5, a1, s10 +; RV64IM-NEXT: and s6, a1, s11 +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, a1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, a1, t2 +; RV64IM-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and ra, a1, t2 +; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, a1, t1 +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, a6 +; RV64IM-NEXT: and a5, a1, a5 +; RV64IM-NEXT: sd a5, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a4, a1, a4 +; RV64IM-NEXT: sd a4, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a1, a3 +; RV64IM-NEXT: sd a3, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a2, a1, a2 +; RV64IM-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, a1, 1024 +; RV64IM-NEXT: srliw a3, a1, 31 +; RV64IM-NEXT: srli a1, a1, 63 +; RV64IM-NEXT: mul s9, a0, a2 +; RV64IM-NEXT: slli a3, a3, 31 +; RV64IM-NEXT: slli a1, a1, 63 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul a2, a0, s3 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s0, a0, s8 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a2 +; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a5, a0, t3 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s1, a0, a1 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul a1, a0, t6 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s10 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s10, a0, s11 +; RV64IM-NEXT: mul s11, a0, ra +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t3, a0, a1 +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s3, a0, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: ld a6, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: ld t1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, t6 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, s8 +; RV64IM-NEXT: ld s8, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s9, s8, s9 +; RV64IM-NEXT: xor a5, a5, s7 +; RV64IM-NEXT: ld s7, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, s7, s2 +; RV64IM-NEXT: ld s7, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s7, s0 +; RV64IM-NEXT: ld s7, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s7, t2 +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: xor t4, s1, s4 +; RV64IM-NEXT: xor s1, s10, s11 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: xor a0, a0, s9 +; RV64IM-NEXT: ld a2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s2, a5 +; RV64IM-NEXT: ld s2, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, s2 +; RV64IM-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, s2 +; RV64IM-NEXT: xor a7, a7, t5 +; RV64IM-NEXT: xor t4, t4, s5 +; RV64IM-NEXT: xor t5, s1, ra +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s0, a3 +; RV64IM-NEXT: ld a5, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: ld t2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t2 +; RV64IM-NEXT: xor t2, t4, s6 +; RV64IM-NEXT: xor t0, t5, t0 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: ld a4, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld a5, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: xor a7, t0, t3 +; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a7, s3 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a1, a1, t6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i8 @llvm.clmul.i8(i8 %a, i8 %b) + ret i8 %res +} + +define i16 @clmul_i16(i16 %a, i16 %b) nounwind { +; RV32IM-LABEL: clmul_i16: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -48 +; RV32IM-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t6, a1, 2 +; RV32IM-NEXT: andi s1, a1, 1 +; RV32IM-NEXT: andi a7, a1, 4 +; RV32IM-NEXT: andi t2, a1, 8 +; RV32IM-NEXT: andi t0, a1, 16 +; RV32IM-NEXT: andi t3, a1, 32 +; RV32IM-NEXT: andi a2, a1, 64 +; RV32IM-NEXT: andi t4, a1, 128 +; RV32IM-NEXT: andi s0, a1, 256 +; RV32IM-NEXT: andi a3, a1, 512 +; RV32IM-NEXT: li a4, 1 +; RV32IM-NEXT: lui a5, 1 +; RV32IM-NEXT: lui a6, 2 +; RV32IM-NEXT: lui t1, 4 +; RV32IM-NEXT: lui t5, 8 +; RV32IM-NEXT: lui s2, 16 +; RV32IM-NEXT: lui s3, 32 +; RV32IM-NEXT: lui s4, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: xor t6, s1, t6 +; RV32IM-NEXT: lui s1, 8192 +; RV32IM-NEXT: mul a7, a0, a7 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: xor a7, a7, t2 +; RV32IM-NEXT: lui t2, 16384 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: xor t0, t0, t3 +; RV32IM-NEXT: lui t3, 32768 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: xor t4, t4, s0 +; RV32IM-NEXT: lui s0, 65536 +; RV32IM-NEXT: xor a7, t6, a7 +; RV32IM-NEXT: lui t6, 131072 +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: xor a2, t0, a2 +; RV32IM-NEXT: lui t0, 262144 +; RV32IM-NEXT: mul a3, a0, a3 +; RV32IM-NEXT: xor a3, t4, a3 +; RV32IM-NEXT: lui t4, 524288 +; RV32IM-NEXT: slli a4, a4, 11 +; RV32IM-NEXT: and a5, a1, a5 +; RV32IM-NEXT: and a6, a1, a6 +; RV32IM-NEXT: and t1, a1, t1 +; RV32IM-NEXT: and t5, a1, t5 +; RV32IM-NEXT: and s2, a1, s2 +; RV32IM-NEXT: and s3, a1, s3 +; RV32IM-NEXT: and s4, a1, s4 +; RV32IM-NEXT: and s5, a1, s5 +; RV32IM-NEXT: and s6, a1, s6 +; RV32IM-NEXT: and s7, a1, s7 +; RV32IM-NEXT: and s8, a1, s8 +; RV32IM-NEXT: and s9, a1, s9 +; RV32IM-NEXT: and s10, a1, s10 +; RV32IM-NEXT: and s1, a1, s1 +; RV32IM-NEXT: and t2, a1, t2 +; RV32IM-NEXT: and t3, a1, t3 +; RV32IM-NEXT: and s0, a1, s0 +; RV32IM-NEXT: and t6, a1, t6 +; RV32IM-NEXT: and t0, a1, t0 +; RV32IM-NEXT: and t4, a1, t4 +; RV32IM-NEXT: and a4, a1, a4 +; RV32IM-NEXT: andi a1, a1, 1024 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: mul a5, a0, a5 +; RV32IM-NEXT: mul a6, a0, a6 +; RV32IM-NEXT: mul t1, a0, t1 +; RV32IM-NEXT: mul t5, a0, t5 +; RV32IM-NEXT: mul s2, a0, s2 +; RV32IM-NEXT: mul s3, a0, s3 +; RV32IM-NEXT: mul s4, a0, s4 +; RV32IM-NEXT: mul s5, a0, s5 +; RV32IM-NEXT: mul s6, a0, s6 +; RV32IM-NEXT: mul s7, a0, s7 +; RV32IM-NEXT: mul s8, a0, s8 +; RV32IM-NEXT: mul s9, a0, s9 +; RV32IM-NEXT: mul s10, a0, s10 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul a0, a0, a4 +; RV32IM-NEXT: xor a4, t1, t5 +; RV32IM-NEXT: xor t1, s5, s6 +; RV32IM-NEXT: xor t2, s1, t2 +; RV32IM-NEXT: xor a2, a7, a2 +; RV32IM-NEXT: xor a1, a3, a1 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a3, a4, s2 +; RV32IM-NEXT: xor a4, t1, s7 +; RV32IM-NEXT: xor a5, t2, t3 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: xor a2, a3, s3 +; RV32IM-NEXT: xor a3, a4, s8 +; RV32IM-NEXT: xor a5, a5, s0 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, a2, s4 +; RV32IM-NEXT: xor a2, a3, s9 +; RV32IM-NEXT: xor a3, a5, t6 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, s10 +; RV32IM-NEXT: xor a2, a3, t0 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, t4 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 48 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmul_i16: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi t2, a1, 2 +; RV64IM-NEXT: andi t4, a1, 1 +; RV64IM-NEXT: andi a6, a1, 4 +; RV64IM-NEXT: andi t0, a1, 8 +; RV64IM-NEXT: andi a5, a1, 16 +; RV64IM-NEXT: andi a7, a1, 32 +; RV64IM-NEXT: andi a3, a1, 64 +; RV64IM-NEXT: andi t1, a1, 128 +; RV64IM-NEXT: andi t3, a1, 256 +; RV64IM-NEXT: andi a4, a1, 512 +; RV64IM-NEXT: li a2, 1 +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: lui t6, 2 +; RV64IM-NEXT: lui s0, 4 +; RV64IM-NEXT: lui s1, 8 +; RV64IM-NEXT: lui s2, 16 +; RV64IM-NEXT: lui s3, 32 +; RV64IM-NEXT: lui s4, 64 +; RV64IM-NEXT: lui s5, 128 +; RV64IM-NEXT: lui s6, 256 +; RV64IM-NEXT: lui s8, 512 +; RV64IM-NEXT: lui s9, 1024 +; RV64IM-NEXT: lui s10, 2048 +; RV64IM-NEXT: lui s11, 4096 +; RV64IM-NEXT: lui ra, 8192 +; RV64IM-NEXT: lui t5, 16384 +; RV64IM-NEXT: mul t2, a0, t2 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: xor t2, t4, t2 +; RV64IM-NEXT: lui t4, 32768 +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: mul t0, a0, t0 +; RV64IM-NEXT: xor a6, a6, t0 +; RV64IM-NEXT: lui t0, 65536 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: mul a7, a0, a7 +; RV64IM-NEXT: xor a5, a5, a7 +; RV64IM-NEXT: lui a7, 131072 +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: mul t3, a0, t3 +; RV64IM-NEXT: xor t1, t1, t3 +; RV64IM-NEXT: lui t3, 262144 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a6, t2, a6 +; RV64IM-NEXT: sd a6, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a6, a2, 11 +; RV64IM-NEXT: sd a6, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s7, a1, s7 +; RV64IM-NEXT: and a6, a1, t6 +; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: and s2, a1, s2 +; RV64IM-NEXT: and s3, a1, s3 +; RV64IM-NEXT: and a6, a1, s4 +; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, s5 +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: and s8, a1, s8 +; RV64IM-NEXT: and t6, a1, s9 +; RV64IM-NEXT: sd t6, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s10 +; RV64IM-NEXT: sd t6, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s11 +; RV64IM-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: and t5, a1, t5 +; RV64IM-NEXT: and t4, a1, t4 +; RV64IM-NEXT: sd t4, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, t3 +; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a2, 32 +; RV64IM-NEXT: xor a3, t1, a4 +; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, a2, 33 +; RV64IM-NEXT: mul a3, a0, s0 +; RV64IM-NEXT: mul a4, a0, s1 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s0, a2, 34 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: mul a4, a0, t2 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s1, a2, 35 +; RV64IM-NEXT: mul a3, a0, t6 +; RV64IM-NEXT: mul a4, a0, t5 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, a2, 36 +; RV64IM-NEXT: slli t6, a2, 37 +; RV64IM-NEXT: slli s5, a2, 38 +; RV64IM-NEXT: slli s6, a2, 39 +; RV64IM-NEXT: slli s9, a2, 40 +; RV64IM-NEXT: slli s10, a2, 41 +; RV64IM-NEXT: slli s11, a2, 42 +; RV64IM-NEXT: slli ra, a2, 43 +; RV64IM-NEXT: slli a3, a2, 44 +; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 45 +; RV64IM-NEXT: sd a3, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 46 +; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 47 +; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 48 +; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 49 +; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 50 +; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 51 +; RV64IM-NEXT: sd a3, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 52 +; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 53 +; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 54 +; RV64IM-NEXT: sd a3, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, a2, 55 +; RV64IM-NEXT: slli t0, a2, 56 +; RV64IM-NEXT: slli a7, a2, 57 +; RV64IM-NEXT: slli a6, a2, 58 +; RV64IM-NEXT: slli a5, a2, 59 +; RV64IM-NEXT: slli a4, a2, 60 +; RV64IM-NEXT: slli a3, a2, 61 +; RV64IM-NEXT: slli a2, a2, 62 +; RV64IM-NEXT: ld t2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t3, a1, t2 +; RV64IM-NEXT: and t2, a1, t4 +; RV64IM-NEXT: sd t2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t4, a1, s4 +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, t5 +; RV64IM-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, t6 +; RV64IM-NEXT: and t2, a1, s5 +; RV64IM-NEXT: sd t2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: sd t2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s4, a1, s9 +; RV64IM-NEXT: and s5, a1, s10 +; RV64IM-NEXT: and s6, a1, s11 +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, a1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, a1, t2 +; RV64IM-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and ra, a1, t2 +; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, a1, t1 +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, a6 +; RV64IM-NEXT: and a5, a1, a5 +; RV64IM-NEXT: sd a5, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a4, a1, a4 +; RV64IM-NEXT: sd a4, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a1, a3 +; RV64IM-NEXT: sd a3, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a2, a1, a2 +; RV64IM-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, a1, 1024 +; RV64IM-NEXT: srliw a3, a1, 31 +; RV64IM-NEXT: srli a1, a1, 63 +; RV64IM-NEXT: mul s9, a0, a2 +; RV64IM-NEXT: slli a3, a3, 31 +; RV64IM-NEXT: slli a1, a1, 63 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul a2, a0, s3 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s0, a0, s8 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a2 +; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a5, a0, t3 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s1, a0, a1 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul a1, a0, t6 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s10 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s10, a0, s11 +; RV64IM-NEXT: mul s11, a0, ra +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t3, a0, a1 +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s3, a0, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: ld a6, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: ld t1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, t6 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, s8 +; RV64IM-NEXT: ld s8, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s9, s8, s9 +; RV64IM-NEXT: xor a5, a5, s7 +; RV64IM-NEXT: ld s7, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, s7, s2 +; RV64IM-NEXT: ld s7, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s7, s0 +; RV64IM-NEXT: ld s7, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s7, t2 +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: xor t4, s1, s4 +; RV64IM-NEXT: xor s1, s10, s11 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: xor a0, a0, s9 +; RV64IM-NEXT: ld a2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s2, a5 +; RV64IM-NEXT: ld s2, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, s2 +; RV64IM-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, s2 +; RV64IM-NEXT: xor a7, a7, t5 +; RV64IM-NEXT: xor t4, t4, s5 +; RV64IM-NEXT: xor t5, s1, ra +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s0, a3 +; RV64IM-NEXT: ld a5, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: ld t2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t2 +; RV64IM-NEXT: xor t2, t4, s6 +; RV64IM-NEXT: xor t0, t5, t0 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: ld a4, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld a5, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: xor a7, t0, t3 +; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a7, s3 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a1, a1, t6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i16 @llvm.clmul.i16(i16 %a, i16 %b) + ret i16 %res +} + +define i32 @clmul_i32(i32 %a, i32 %b) nounwind { +; RV32IM-LABEL: clmul_i32: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -48 +; RV32IM-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t6, a1, 2 +; RV32IM-NEXT: andi s1, a1, 1 +; RV32IM-NEXT: andi a7, a1, 4 +; RV32IM-NEXT: andi t2, a1, 8 +; RV32IM-NEXT: andi t0, a1, 16 +; RV32IM-NEXT: andi t3, a1, 32 +; RV32IM-NEXT: andi a2, a1, 64 +; RV32IM-NEXT: andi t4, a1, 128 +; RV32IM-NEXT: andi s0, a1, 256 +; RV32IM-NEXT: andi a3, a1, 512 +; RV32IM-NEXT: li a4, 1 +; RV32IM-NEXT: lui a5, 1 +; RV32IM-NEXT: lui a6, 2 +; RV32IM-NEXT: lui t1, 4 +; RV32IM-NEXT: lui t5, 8 +; RV32IM-NEXT: lui s2, 16 +; RV32IM-NEXT: lui s3, 32 +; RV32IM-NEXT: lui s4, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: xor t6, s1, t6 +; RV32IM-NEXT: lui s1, 8192 +; RV32IM-NEXT: mul a7, a0, a7 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: xor a7, a7, t2 +; RV32IM-NEXT: lui t2, 16384 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: xor t0, t0, t3 +; RV32IM-NEXT: lui t3, 32768 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: xor t4, t4, s0 +; RV32IM-NEXT: lui s0, 65536 +; RV32IM-NEXT: xor a7, t6, a7 +; RV32IM-NEXT: lui t6, 131072 +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: xor a2, t0, a2 +; RV32IM-NEXT: lui t0, 262144 +; RV32IM-NEXT: mul a3, a0, a3 +; RV32IM-NEXT: xor a3, t4, a3 +; RV32IM-NEXT: lui t4, 524288 +; RV32IM-NEXT: slli a4, a4, 11 +; RV32IM-NEXT: and a5, a1, a5 +; RV32IM-NEXT: and a6, a1, a6 +; RV32IM-NEXT: and t1, a1, t1 +; RV32IM-NEXT: and t5, a1, t5 +; RV32IM-NEXT: and s2, a1, s2 +; RV32IM-NEXT: and s3, a1, s3 +; RV32IM-NEXT: and s4, a1, s4 +; RV32IM-NEXT: and s5, a1, s5 +; RV32IM-NEXT: and s6, a1, s6 +; RV32IM-NEXT: and s7, a1, s7 +; RV32IM-NEXT: and s8, a1, s8 +; RV32IM-NEXT: and s9, a1, s9 +; RV32IM-NEXT: and s10, a1, s10 +; RV32IM-NEXT: and s1, a1, s1 +; RV32IM-NEXT: and t2, a1, t2 +; RV32IM-NEXT: and t3, a1, t3 +; RV32IM-NEXT: and s0, a1, s0 +; RV32IM-NEXT: and t6, a1, t6 +; RV32IM-NEXT: and t0, a1, t0 +; RV32IM-NEXT: and t4, a1, t4 +; RV32IM-NEXT: and a4, a1, a4 +; RV32IM-NEXT: andi a1, a1, 1024 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: mul a5, a0, a5 +; RV32IM-NEXT: mul a6, a0, a6 +; RV32IM-NEXT: mul t1, a0, t1 +; RV32IM-NEXT: mul t5, a0, t5 +; RV32IM-NEXT: mul s2, a0, s2 +; RV32IM-NEXT: mul s3, a0, s3 +; RV32IM-NEXT: mul s4, a0, s4 +; RV32IM-NEXT: mul s5, a0, s5 +; RV32IM-NEXT: mul s6, a0, s6 +; RV32IM-NEXT: mul s7, a0, s7 +; RV32IM-NEXT: mul s8, a0, s8 +; RV32IM-NEXT: mul s9, a0, s9 +; RV32IM-NEXT: mul s10, a0, s10 +; RV32IM-NEXT: mul s1, a0, s1 +; RV32IM-NEXT: mul t2, a0, t2 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: mul s0, a0, s0 +; RV32IM-NEXT: mul t6, a0, t6 +; RV32IM-NEXT: mul t0, a0, t0 +; RV32IM-NEXT: mul t4, a0, t4 +; RV32IM-NEXT: mul a0, a0, a4 +; RV32IM-NEXT: xor a4, t1, t5 +; RV32IM-NEXT: xor t1, s5, s6 +; RV32IM-NEXT: xor t2, s1, t2 +; RV32IM-NEXT: xor a2, a7, a2 +; RV32IM-NEXT: xor a1, a3, a1 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a3, a4, s2 +; RV32IM-NEXT: xor a4, t1, s7 +; RV32IM-NEXT: xor a5, t2, t3 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: xor a2, a3, s3 +; RV32IM-NEXT: xor a3, a4, s8 +; RV32IM-NEXT: xor a5, a5, s0 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, a2, s4 +; RV32IM-NEXT: xor a2, a3, s9 +; RV32IM-NEXT: xor a3, a5, t6 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, s10 +; RV32IM-NEXT: xor a2, a3, t0 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a1, a2, t4 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 48 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmul_i32: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi t2, a1, 2 +; RV64IM-NEXT: andi t4, a1, 1 +; RV64IM-NEXT: andi a6, a1, 4 +; RV64IM-NEXT: andi t0, a1, 8 +; RV64IM-NEXT: andi a5, a1, 16 +; RV64IM-NEXT: andi a7, a1, 32 +; RV64IM-NEXT: andi a3, a1, 64 +; RV64IM-NEXT: andi t1, a1, 128 +; RV64IM-NEXT: andi t3, a1, 256 +; RV64IM-NEXT: andi a4, a1, 512 +; RV64IM-NEXT: li a2, 1 +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: lui t6, 2 +; RV64IM-NEXT: lui s0, 4 +; RV64IM-NEXT: lui s1, 8 +; RV64IM-NEXT: lui s2, 16 +; RV64IM-NEXT: lui s3, 32 +; RV64IM-NEXT: lui s4, 64 +; RV64IM-NEXT: lui s5, 128 +; RV64IM-NEXT: lui s6, 256 +; RV64IM-NEXT: lui s8, 512 +; RV64IM-NEXT: lui s9, 1024 +; RV64IM-NEXT: lui s10, 2048 +; RV64IM-NEXT: lui s11, 4096 +; RV64IM-NEXT: lui ra, 8192 +; RV64IM-NEXT: lui t5, 16384 +; RV64IM-NEXT: mul t2, a0, t2 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: xor t2, t4, t2 +; RV64IM-NEXT: lui t4, 32768 +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: mul t0, a0, t0 +; RV64IM-NEXT: xor a6, a6, t0 +; RV64IM-NEXT: lui t0, 65536 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: mul a7, a0, a7 +; RV64IM-NEXT: xor a5, a5, a7 +; RV64IM-NEXT: lui a7, 131072 +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: mul t3, a0, t3 +; RV64IM-NEXT: xor t1, t1, t3 +; RV64IM-NEXT: lui t3, 262144 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a6, t2, a6 +; RV64IM-NEXT: sd a6, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a6, a2, 11 +; RV64IM-NEXT: sd a6, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s7, a1, s7 +; RV64IM-NEXT: and a6, a1, t6 +; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: and s2, a1, s2 +; RV64IM-NEXT: and s3, a1, s3 +; RV64IM-NEXT: and a6, a1, s4 +; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, s5 +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: and s8, a1, s8 +; RV64IM-NEXT: and t6, a1, s9 +; RV64IM-NEXT: sd t6, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s10 +; RV64IM-NEXT: sd t6, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s11 +; RV64IM-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: and t5, a1, t5 +; RV64IM-NEXT: and t4, a1, t4 +; RV64IM-NEXT: sd t4, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, t3 +; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a2, 32 +; RV64IM-NEXT: xor a3, t1, a4 +; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, a2, 33 +; RV64IM-NEXT: mul a3, a0, s0 +; RV64IM-NEXT: mul a4, a0, s1 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s0, a2, 34 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: mul a4, a0, t2 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s1, a2, 35 +; RV64IM-NEXT: mul a3, a0, t6 +; RV64IM-NEXT: mul a4, a0, t5 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, a2, 36 +; RV64IM-NEXT: slli t6, a2, 37 +; RV64IM-NEXT: slli s5, a2, 38 +; RV64IM-NEXT: slli s6, a2, 39 +; RV64IM-NEXT: slli s9, a2, 40 +; RV64IM-NEXT: slli s10, a2, 41 +; RV64IM-NEXT: slli s11, a2, 42 +; RV64IM-NEXT: slli ra, a2, 43 +; RV64IM-NEXT: slli a3, a2, 44 +; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 45 +; RV64IM-NEXT: sd a3, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 46 +; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 47 +; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 48 +; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 49 +; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 50 +; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 51 +; RV64IM-NEXT: sd a3, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 52 +; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 53 +; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 54 +; RV64IM-NEXT: sd a3, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, a2, 55 +; RV64IM-NEXT: slli t0, a2, 56 +; RV64IM-NEXT: slli a7, a2, 57 +; RV64IM-NEXT: slli a6, a2, 58 +; RV64IM-NEXT: slli a5, a2, 59 +; RV64IM-NEXT: slli a4, a2, 60 +; RV64IM-NEXT: slli a3, a2, 61 +; RV64IM-NEXT: slli a2, a2, 62 +; RV64IM-NEXT: ld t2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t3, a1, t2 +; RV64IM-NEXT: and t2, a1, t4 +; RV64IM-NEXT: sd t2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t4, a1, s4 +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, t5 +; RV64IM-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, t6 +; RV64IM-NEXT: and t2, a1, s5 +; RV64IM-NEXT: sd t2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: sd t2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s4, a1, s9 +; RV64IM-NEXT: and s5, a1, s10 +; RV64IM-NEXT: and s6, a1, s11 +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, a1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, a1, t2 +; RV64IM-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and ra, a1, t2 +; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, a1, t1 +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, a6 +; RV64IM-NEXT: and a5, a1, a5 +; RV64IM-NEXT: sd a5, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a4, a1, a4 +; RV64IM-NEXT: sd a4, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a1, a3 +; RV64IM-NEXT: sd a3, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a2, a1, a2 +; RV64IM-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, a1, 1024 +; RV64IM-NEXT: srliw a3, a1, 31 +; RV64IM-NEXT: srli a1, a1, 63 +; RV64IM-NEXT: mul s9, a0, a2 +; RV64IM-NEXT: slli a3, a3, 31 +; RV64IM-NEXT: slli a1, a1, 63 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul a2, a0, s3 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s0, a0, s8 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a2 +; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a5, a0, t3 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s1, a0, a1 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul a1, a0, t6 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s10 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s10, a0, s11 +; RV64IM-NEXT: mul s11, a0, ra +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t3, a0, a1 +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s3, a0, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: ld a6, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: ld t1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, t6 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, s8 +; RV64IM-NEXT: ld s8, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s9, s8, s9 +; RV64IM-NEXT: xor a5, a5, s7 +; RV64IM-NEXT: ld s7, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, s7, s2 +; RV64IM-NEXT: ld s7, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s7, s0 +; RV64IM-NEXT: ld s7, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s7, t2 +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: xor t4, s1, s4 +; RV64IM-NEXT: xor s1, s10, s11 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: xor a0, a0, s9 +; RV64IM-NEXT: ld a2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s2, a5 +; RV64IM-NEXT: ld s2, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, s2 +; RV64IM-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, s2 +; RV64IM-NEXT: xor a7, a7, t5 +; RV64IM-NEXT: xor t4, t4, s5 +; RV64IM-NEXT: xor t5, s1, ra +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s0, a3 +; RV64IM-NEXT: ld a5, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: ld t2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t2 +; RV64IM-NEXT: xor t2, t4, s6 +; RV64IM-NEXT: xor t0, t5, t0 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: ld a4, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld a5, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: xor a7, t0, t3 +; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a7, s3 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a1, a1, t6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i32 @llvm.clmul.i32(i32 %a, i32 %b) + ret i32 %res +} + +define i64 @clmul_i64(i64 %a, i64 %b) nounwind { +; RV32IM-LABEL: clmul_i64: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -368 +; RV32IM-NEXT: sw ra, 364(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 360(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 356(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 352(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 348(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 344(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 340(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 336(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 332(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 328(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 324(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 320(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 316(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mv t0, a1 +; RV32IM-NEXT: andi t3, a2, 4 +; RV32IM-NEXT: andi t5, a2, 2 +; RV32IM-NEXT: andi a4, a2, 1 +; RV32IM-NEXT: andi t6, a2, 8 +; RV32IM-NEXT: andi s0, a2, 16 +; RV32IM-NEXT: andi s1, a2, 32 +; RV32IM-NEXT: andi s9, a2, 64 +; RV32IM-NEXT: andi t1, a2, 128 +; RV32IM-NEXT: andi s2, a2, 256 +; RV32IM-NEXT: andi ra, a2, 512 +; RV32IM-NEXT: andi s11, a2, 1024 +; RV32IM-NEXT: andi s4, a3, 1 +; RV32IM-NEXT: mul a5, a1, t3 +; RV32IM-NEXT: mulhu a6, a0, t3 +; RV32IM-NEXT: mul a1, a1, t5 +; RV32IM-NEXT: mulhu a7, a0, t5 +; RV32IM-NEXT: mul t2, t0, t6 +; RV32IM-NEXT: mulhu t4, a0, t6 +; RV32IM-NEXT: mul s3, t0, s0 +; RV32IM-NEXT: mulhu s5, a0, s0 +; RV32IM-NEXT: mul s6, t0, s1 +; RV32IM-NEXT: mulhu s7, a0, s1 +; RV32IM-NEXT: sw s9, 296(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s8, t0, s9 +; RV32IM-NEXT: or a5, a6, a5 +; RV32IM-NEXT: mulhu s9, a0, s9 +; RV32IM-NEXT: or a6, a7, a1 +; RV32IM-NEXT: mul s10, t0, t1 +; RV32IM-NEXT: or a1, t4, t2 +; RV32IM-NEXT: mulhu t4, a0, t1 +; RV32IM-NEXT: or a7, s5, s3 +; RV32IM-NEXT: mul s3, t0, s2 +; RV32IM-NEXT: or t2, s7, s6 +; RV32IM-NEXT: mulhu s5, a0, s2 +; RV32IM-NEXT: or s6, s9, s8 +; RV32IM-NEXT: sw s6, 308(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw ra, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s6, t0, ra +; RV32IM-NEXT: or t4, t4, s10 +; RV32IM-NEXT: mulhu s7, a0, ra +; RV32IM-NEXT: or s3, s5, s3 +; RV32IM-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s5, t0, s11 +; RV32IM-NEXT: or s6, s7, s6 +; RV32IM-NEXT: sw s6, 304(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu s6, a0, s11 +; RV32IM-NEXT: or s5, s6, s5 +; RV32IM-NEXT: sw s5, 312(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi s5, a3, 2 +; RV32IM-NEXT: mul s4, a0, s4 +; RV32IM-NEXT: mul s5, a0, s5 +; RV32IM-NEXT: mul t5, a0, t5 +; RV32IM-NEXT: xor s9, s4, s5 +; RV32IM-NEXT: mul s4, a0, a4 +; RV32IM-NEXT: xor s6, s4, t5 +; RV32IM-NEXT: lui s4, 2 +; RV32IM-NEXT: mul t3, a0, t3 +; RV32IM-NEXT: mul t5, a0, t6 +; RV32IM-NEXT: xor s7, t3, t5 +; RV32IM-NEXT: lui s10, 4 +; RV32IM-NEXT: mul a4, t0, a4 +; RV32IM-NEXT: mul t3, a0, s0 +; RV32IM-NEXT: mul t5, a0, s1 +; RV32IM-NEXT: xor s0, t3, t5 +; RV32IM-NEXT: lui s5, 1 +; RV32IM-NEXT: and t6, a2, s5 +; RV32IM-NEXT: mul t1, a0, t1 +; RV32IM-NEXT: mul t3, a0, s2 +; RV32IM-NEXT: xor s1, t1, t3 +; RV32IM-NEXT: and t3, a2, s4 +; RV32IM-NEXT: xor a4, a4, a6 +; RV32IM-NEXT: sw a4, 276(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t1, a2, s10 +; RV32IM-NEXT: xor a1, a5, a1 +; RV32IM-NEXT: sw a1, 272(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw t6, 204(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, t6 +; RV32IM-NEXT: xor a1, a7, t2 +; RV32IM-NEXT: sw a1, 268(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a5, a0, t6 +; RV32IM-NEXT: xor a1, t4, s3 +; RV32IM-NEXT: sw a1, 260(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw t3, 196(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a6, t0, t3 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 288(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a4, a0, t3 +; RV32IM-NEXT: or a1, a4, a6 +; RV32IM-NEXT: sw a1, 292(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw t1, 200(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, t1 +; RV32IM-NEXT: mulhu a5, a0, t1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 256(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 8 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: sw a1, 188(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 248(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 16 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: lui s8, 16 +; RV32IM-NEXT: sw a1, 184(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 264(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 32 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: sw a1, 176(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 280(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t2, 64 +; RV32IM-NEXT: and a1, a2, t2 +; RV32IM-NEXT: sw a1, 172(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 284(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui s3, 128 +; RV32IM-NEXT: and a1, a2, s3 +; RV32IM-NEXT: sw a1, 164(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 232(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 256 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: lui t5, 256 +; RV32IM-NEXT: sw a1, 160(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 220(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t3, 512 +; RV32IM-NEXT: and a1, a2, t3 +; RV32IM-NEXT: sw a1, 156(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 236(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t4, 1024 +; RV32IM-NEXT: and a1, a2, t4 +; RV32IM-NEXT: sw a1, 152(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 240(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t6, 2048 +; RV32IM-NEXT: and a1, a2, t6 +; RV32IM-NEXT: sw a1, 148(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 244(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui s2, 4096 +; RV32IM-NEXT: and a1, a2, s2 +; RV32IM-NEXT: sw a1, 144(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 252(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 8192 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: sw a1, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 180(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 16384 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: lui s4, 16384 +; RV32IM-NEXT: sw a1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 168(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 32768 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: lui t1, 32768 +; RV32IM-NEXT: sw a1, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 192(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 65536 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: lui a7, 65536 +; RV32IM-NEXT: sw a1, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 208(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 131072 +; RV32IM-NEXT: and a1, a2, a1 +; RV32IM-NEXT: lui a6, 131072 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: or a4, a5, a4 +; RV32IM-NEXT: sw a4, 212(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a5, 262144 +; RV32IM-NEXT: and a1, a2, a5 +; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, t0, a1 +; RV32IM-NEXT: mulhu ra, a0, a1 +; RV32IM-NEXT: or a1, ra, a4 +; RV32IM-NEXT: sw a1, 216(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui ra, 524288 +; RV32IM-NEXT: and s10, a2, ra +; RV32IM-NEXT: mul a1, t0, s10 +; RV32IM-NEXT: mulhu s11, a0, s10 +; RV32IM-NEXT: or a1, s11, a1 +; RV32IM-NEXT: sw a1, 224(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a3, 4 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: xor a1, s9, a1 +; RV32IM-NEXT: sw a1, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a3, s3 +; RV32IM-NEXT: and s11, a3, t5 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: mul s11, a0, s11 +; RV32IM-NEXT: xor a1, a1, s11 +; RV32IM-NEXT: sw a1, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a3, a5 +; RV32IM-NEXT: and a1, a3, ra +; RV32IM-NEXT: mul a5, a0, a5 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: xor a1, a5, a1 +; RV32IM-NEXT: sw a1, 228(sp) # 4-byte Folded Spill +; RV32IM-NEXT: xor a1, s6, s7 +; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 296(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: xor a1, s0, a1 +; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: xor a1, s1, a1 +; RV32IM-NEXT: sw a1, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: li a1, 1 +; RV32IM-NEXT: slli a1, a1, 11 +; RV32IM-NEXT: and s6, a3, s5 +; RV32IM-NEXT: lui a4, 2 +; RV32IM-NEXT: and s11, a3, a4 +; RV32IM-NEXT: lui a4, 4 +; RV32IM-NEXT: and s5, a3, a4 +; RV32IM-NEXT: lui a4, 8 +; RV32IM-NEXT: and s7, a3, a4 +; RV32IM-NEXT: and s8, a3, s8 +; RV32IM-NEXT: lui a4, 32 +; RV32IM-NEXT: and s9, a3, a4 +; RV32IM-NEXT: and t2, a3, t2 +; RV32IM-NEXT: and t3, a3, t3 +; RV32IM-NEXT: and t4, a3, t4 +; RV32IM-NEXT: and t5, a3, t6 +; RV32IM-NEXT: and t6, a3, s2 +; RV32IM-NEXT: lui s0, 8192 +; RV32IM-NEXT: and s0, a3, s0 +; RV32IM-NEXT: and s1, a3, s4 +; RV32IM-NEXT: and s2, a3, t1 +; RV32IM-NEXT: and s3, a3, a7 +; RV32IM-NEXT: and s4, a3, a6 +; RV32IM-NEXT: and t1, a2, a1 +; RV32IM-NEXT: and a1, a3, a1 +; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a3, 8 +; RV32IM-NEXT: andi ra, a3, 16 +; RV32IM-NEXT: andi a2, a3, 32 +; RV32IM-NEXT: andi a4, a3, 64 +; RV32IM-NEXT: andi a5, a3, 128 +; RV32IM-NEXT: andi a6, a3, 256 +; RV32IM-NEXT: andi a7, a3, 512 +; RV32IM-NEXT: andi a3, a3, 1024 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, ra +; RV32IM-NEXT: sw a1, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a2 +; RV32IM-NEXT: sw a1, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a4 +; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a5 +; RV32IM-NEXT: sw a1, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a6 +; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a7 +; RV32IM-NEXT: sw a1, 296(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, a0, a3 +; RV32IM-NEXT: lw a1, 300(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s6 +; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s11 +; RV32IM-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s5 +; RV32IM-NEXT: sw a1, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s7 +; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s8 +; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s9 +; RV32IM-NEXT: sw a1, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, t2 +; RV32IM-NEXT: sw a1, 300(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, a0, t3 +; RV32IM-NEXT: mul a1, a0, t4 +; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, t5 +; RV32IM-NEXT: sw a1, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, t6 +; RV32IM-NEXT: sw a1, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s0 +; RV32IM-NEXT: sw a1, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s1 +; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s2 +; RV32IM-NEXT: sw a1, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s3 +; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s4 +; RV32IM-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 204(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, a0, a1 +; RV32IM-NEXT: lw a1, 196(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 196(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 200(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t3, a0, a1 +; RV32IM-NEXT: lw a1, 188(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a0, a1 +; RV32IM-NEXT: lw a1, 184(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t5, a0, a1 +; RV32IM-NEXT: lw a1, 176(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t2, a0, a1 +; RV32IM-NEXT: lw a1, 172(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 204(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 164(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a0, a1 +; RV32IM-NEXT: lw a1, 160(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a0, a1 +; RV32IM-NEXT: lw a1, 156(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s1, a0, a1 +; RV32IM-NEXT: lw a1, 152(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a0, a1 +; RV32IM-NEXT: lw a1, 148(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a0, a1 +; RV32IM-NEXT: lw a1, 144(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 200(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a0, a1 +; RV32IM-NEXT: lw a1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a0, a1 +; RV32IM-NEXT: lw a1, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s5, a0, a1 +; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s6, a0, a1 +; RV32IM-NEXT: lw a1, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a0, a1 +; RV32IM-NEXT: lw a1, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a0, a1 +; RV32IM-NEXT: mul a6, a0, s10 +; RV32IM-NEXT: mul t0, t0, t1 +; RV32IM-NEXT: lw a1, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s8, a0, a1 +; RV32IM-NEXT: mulhu s9, a0, t1 +; RV32IM-NEXT: mul a4, a0, t1 +; RV32IM-NEXT: xor t1, t3, t4 +; RV32IM-NEXT: xor t3, t6, s0 +; RV32IM-NEXT: xor t4, s3, s4 +; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 272(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t6, a0, a1 +; RV32IM-NEXT: lw a0, 308(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 268(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s0, s0, a0 +; RV32IM-NEXT: lw a0, 304(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 260(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s3, a1, a0 +; RV32IM-NEXT: or t0, s9, t0 +; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 248(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s4, a0, a1 +; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 220(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s9, a0, a1 +; RV32IM-NEXT: lw a0, 180(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 168(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s10, a0, a1 +; RV32IM-NEXT: lw a0, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a2, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a0, a2 +; RV32IM-NEXT: xor s8, ra, s8 +; RV32IM-NEXT: lw a0, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a0, a3 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a0, a1 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, ra +; RV32IM-NEXT: xor a4, a4, s11 +; RV32IM-NEXT: xor t1, t1, t5 +; RV32IM-NEXT: xor t3, t3, s1 +; RV32IM-NEXT: xor t4, t4, s5 +; RV32IM-NEXT: xor t5, t6, s0 +; RV32IM-NEXT: lw t6, 312(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t6, s3, t6 +; RV32IM-NEXT: lw s0, 288(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t0, t0, s0 +; RV32IM-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s0, s4, s0 +; RV32IM-NEXT: lw s1, 236(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s1, s9, s1 +; RV32IM-NEXT: lw s3, 192(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s3, s10, s3 +; RV32IM-NEXT: lw s4, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, s4 +; RV32IM-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s4, s8, s4 +; RV32IM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, s5 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: lw a1, 196(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a4, a1 +; RV32IM-NEXT: xor a4, t1, t2 +; RV32IM-NEXT: xor t1, t3, s2 +; RV32IM-NEXT: xor t2, t4, s6 +; RV32IM-NEXT: xor t3, t5, t6 +; RV32IM-NEXT: lw t4, 292(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t0, t0, t4 +; RV32IM-NEXT: lw t4, 280(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t4, s0, t4 +; RV32IM-NEXT: lw t5, 240(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t5, s1, t5 +; RV32IM-NEXT: lw t6, 208(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t6, s3, t6 +; RV32IM-NEXT: lw s0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, s0 +; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s0, s4, s0 +; RV32IM-NEXT: lw s1, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, s1 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw a1, 204(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a4, a1 +; RV32IM-NEXT: xor a4, t1, a7 +; RV32IM-NEXT: xor a7, t2, s7 +; RV32IM-NEXT: xor t0, t3, t0 +; RV32IM-NEXT: lw t1, 284(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t4, t1 +; RV32IM-NEXT: lw t2, 244(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t2, t5, t2 +; RV32IM-NEXT: lw t3, 212(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t3, t6, t3 +; RV32IM-NEXT: lw t4, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: lw t4, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t4, s0, t4 +; RV32IM-NEXT: lw t5, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, t5 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw a1, 200(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a4, a1 +; RV32IM-NEXT: xor a4, a7, a5 +; RV32IM-NEXT: xor a5, t0, t1 +; RV32IM-NEXT: lw a7, 252(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a7, t2, a7 +; RV32IM-NEXT: lw t0, 216(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t0, t3, t0 +; RV32IM-NEXT: lw t1, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, t1 +; RV32IM-NEXT: lw t1, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t4, t1 +; RV32IM-NEXT: lw t2, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, t2 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: xor a4, a4, a6 +; RV32IM-NEXT: xor a1, a5, a7 +; RV32IM-NEXT: lw a5, 224(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, t0, a5 +; RV32IM-NEXT: lw a6, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: lw a6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a6, t1, a6 +; RV32IM-NEXT: lw a7, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a7 +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: lw a5, 296(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: lw a5, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a6, a5 +; RV32IM-NEXT: lw a6, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a6 +; RV32IM-NEXT: xor a1, a1, a2 +; RV32IM-NEXT: lw a2, 300(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a5, a2 +; RV32IM-NEXT: lw a5, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: xor a1, a1, a2 +; RV32IM-NEXT: lw a2, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a1, a1, a2 +; RV32IM-NEXT: lw a2, 228(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a2 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: lw ra, 364(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 360(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 356(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 352(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 348(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 344(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 340(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 336(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 332(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 328(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 324(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 320(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 316(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 368 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmul_i64: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi t2, a1, 2 +; RV64IM-NEXT: andi t4, a1, 1 +; RV64IM-NEXT: andi a6, a1, 4 +; RV64IM-NEXT: andi t0, a1, 8 +; RV64IM-NEXT: andi a5, a1, 16 +; RV64IM-NEXT: andi a7, a1, 32 +; RV64IM-NEXT: andi a3, a1, 64 +; RV64IM-NEXT: andi t1, a1, 128 +; RV64IM-NEXT: andi t3, a1, 256 +; RV64IM-NEXT: andi a4, a1, 512 +; RV64IM-NEXT: li a2, 1 +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: lui t6, 2 +; RV64IM-NEXT: lui s0, 4 +; RV64IM-NEXT: lui s1, 8 +; RV64IM-NEXT: lui s2, 16 +; RV64IM-NEXT: lui s3, 32 +; RV64IM-NEXT: lui s4, 64 +; RV64IM-NEXT: lui s5, 128 +; RV64IM-NEXT: lui s6, 256 +; RV64IM-NEXT: lui s8, 512 +; RV64IM-NEXT: lui s9, 1024 +; RV64IM-NEXT: lui s10, 2048 +; RV64IM-NEXT: lui s11, 4096 +; RV64IM-NEXT: lui ra, 8192 +; RV64IM-NEXT: lui t5, 16384 +; RV64IM-NEXT: mul t2, a0, t2 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: xor t2, t4, t2 +; RV64IM-NEXT: lui t4, 32768 +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: mul t0, a0, t0 +; RV64IM-NEXT: xor a6, a6, t0 +; RV64IM-NEXT: lui t0, 65536 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: mul a7, a0, a7 +; RV64IM-NEXT: xor a5, a5, a7 +; RV64IM-NEXT: lui a7, 131072 +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: mul t3, a0, t3 +; RV64IM-NEXT: xor t1, t1, t3 +; RV64IM-NEXT: lui t3, 262144 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a6, t2, a6 +; RV64IM-NEXT: sd a6, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a6, a2, 11 +; RV64IM-NEXT: sd a6, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s7, a1, s7 +; RV64IM-NEXT: and a6, a1, t6 +; RV64IM-NEXT: sd a6, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: and s2, a1, s2 +; RV64IM-NEXT: and s3, a1, s3 +; RV64IM-NEXT: and a6, a1, s4 +; RV64IM-NEXT: sd a6, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, s5 +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: and s8, a1, s8 +; RV64IM-NEXT: and t6, a1, s9 +; RV64IM-NEXT: sd t6, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s10 +; RV64IM-NEXT: sd t6, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, s11 +; RV64IM-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: and t5, a1, t5 +; RV64IM-NEXT: and t4, a1, t4 +; RV64IM-NEXT: sd t4, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, t3 +; RV64IM-NEXT: sd a7, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: sd a3, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a2, 32 +; RV64IM-NEXT: xor a3, t1, a4 +; RV64IM-NEXT: sd a3, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, a2, 33 +; RV64IM-NEXT: mul a3, a0, s0 +; RV64IM-NEXT: mul a4, a0, s1 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s0, a2, 34 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: mul a4, a0, t2 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s1, a2, 35 +; RV64IM-NEXT: mul a3, a0, t6 +; RV64IM-NEXT: mul a4, a0, t5 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, a2, 36 +; RV64IM-NEXT: slli t6, a2, 37 +; RV64IM-NEXT: slli s5, a2, 38 +; RV64IM-NEXT: slli s6, a2, 39 +; RV64IM-NEXT: slli s9, a2, 40 +; RV64IM-NEXT: slli s10, a2, 41 +; RV64IM-NEXT: slli s11, a2, 42 +; RV64IM-NEXT: slli ra, a2, 43 +; RV64IM-NEXT: slli a3, a2, 44 +; RV64IM-NEXT: sd a3, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 45 +; RV64IM-NEXT: sd a3, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 46 +; RV64IM-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 47 +; RV64IM-NEXT: sd a3, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 48 +; RV64IM-NEXT: sd a3, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 49 +; RV64IM-NEXT: sd a3, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 50 +; RV64IM-NEXT: sd a3, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 51 +; RV64IM-NEXT: sd a3, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 52 +; RV64IM-NEXT: sd a3, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 53 +; RV64IM-NEXT: sd a3, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a2, 54 +; RV64IM-NEXT: sd a3, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, a2, 55 +; RV64IM-NEXT: slli t0, a2, 56 +; RV64IM-NEXT: slli a7, a2, 57 +; RV64IM-NEXT: slli a6, a2, 58 +; RV64IM-NEXT: slli a5, a2, 59 +; RV64IM-NEXT: slli a4, a2, 60 +; RV64IM-NEXT: slli a3, a2, 61 +; RV64IM-NEXT: slli a2, a2, 62 +; RV64IM-NEXT: ld t2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t3, a1, t2 +; RV64IM-NEXT: and t2, a1, t4 +; RV64IM-NEXT: sd t2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t4, a1, s4 +; RV64IM-NEXT: and s0, a1, s0 +; RV64IM-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, s1 +; RV64IM-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, t5 +; RV64IM-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s1, a1, t6 +; RV64IM-NEXT: and t2, a1, s5 +; RV64IM-NEXT: sd t2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t2, a1, s6 +; RV64IM-NEXT: sd t2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s4, a1, s9 +; RV64IM-NEXT: and s5, a1, s10 +; RV64IM-NEXT: and s6, a1, s11 +; RV64IM-NEXT: and t6, a1, ra +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, a1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, a1, t2 +; RV64IM-NEXT: ld t2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and ra, a1, t2 +; RV64IM-NEXT: ld t2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t2, a1, t2 +; RV64IM-NEXT: sd t2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, a1, t1 +; RV64IM-NEXT: and t0, a1, t0 +; RV64IM-NEXT: sd t0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a7, a1, a7 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a6, a1, a6 +; RV64IM-NEXT: and a5, a1, a5 +; RV64IM-NEXT: sd a5, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a4, a1, a4 +; RV64IM-NEXT: sd a4, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a1, a3 +; RV64IM-NEXT: sd a3, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a2, a1, a2 +; RV64IM-NEXT: sd a2, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, a1, 1024 +; RV64IM-NEXT: srliw a3, a1, 31 +; RV64IM-NEXT: srli a1, a1, 63 +; RV64IM-NEXT: mul s9, a0, a2 +; RV64IM-NEXT: slli a3, a3, 31 +; RV64IM-NEXT: slli a1, a1, 63 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul a2, a0, s3 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s0, a0, s8 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a2 +; RV64IM-NEXT: ld a2, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a5, a0, t3 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: mul t4, a0, t4 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s1, a0, a1 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul a1, a0, t6 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, s10 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s10, a0, s11 +; RV64IM-NEXT: mul s11, a0, ra +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t3, a0, a1 +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s3, a0, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: ld a4, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: ld a6, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a6 +; RV64IM-NEXT: ld t1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, t1 +; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, t6 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, s8 +; RV64IM-NEXT: ld s8, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s9, s8, s9 +; RV64IM-NEXT: xor a5, a5, s7 +; RV64IM-NEXT: ld s7, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, s7, s2 +; RV64IM-NEXT: ld s7, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s7, s0 +; RV64IM-NEXT: ld s7, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s7, t2 +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: xor t4, s1, s4 +; RV64IM-NEXT: xor s1, s10, s11 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: xor a0, a0, s9 +; RV64IM-NEXT: ld a2, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s2, a5 +; RV64IM-NEXT: ld s2, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, s2 +; RV64IM-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, s2 +; RV64IM-NEXT: xor a7, a7, t5 +; RV64IM-NEXT: xor t4, t4, s5 +; RV64IM-NEXT: xor t5, s1, ra +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a5, a2 +; RV64IM-NEXT: ld a3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s0, a3 +; RV64IM-NEXT: ld a5, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: ld t2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t2 +; RV64IM-NEXT: xor t2, t4, s6 +; RV64IM-NEXT: xor t0, t5, t0 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a5, a3 +; RV64IM-NEXT: ld a4, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld a5, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, t2, a5 +; RV64IM-NEXT: xor a7, t0, t3 +; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a7, s3 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: ld a4, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a1, a1, t6 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: ld a3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: xor a0, a0, a2 +; RV64IM-NEXT: ld a2, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i64 @llvm.clmul.i64(i64 %a, i64 %b) + ret i64 %res +} + +define i4 @clmul_constfold_i4() nounwind { +; CHECK-LABEL: clmul_constfold_i4: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 2 +; CHECK-NEXT: ret + %res = call i4 @llvm.clmul.i4(i4 1, i4 2) + ret i4 %res +} + +define i16 @clmul_constfold_i16() nounwind { +; RV32IM-LABEL: clmul_constfold_i16: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a0, 699051 +; RV32IM-NEXT: addi a0, a0, -1366 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmul_constfold_i16: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lui a0, %hi(.LCPI6_0) +; RV64IM-NEXT: ld a0, %lo(.LCPI6_0)(a0) +; RV64IM-NEXT: ret + %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1) + ret i16 %res +} + +define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { +; RV32IM-LABEL: clmulr_i4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli a3, a0, 8 +; RV32IM-NEXT: lui s9, 16 +; RV32IM-NEXT: srli a4, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui a7, 61681 +; RV32IM-NEXT: lui ra, 209715 +; RV32IM-NEXT: lui a1, 349525 +; RV32IM-NEXT: li s0, 1 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: lui t2, 2 +; RV32IM-NEXT: lui t3, 4 +; RV32IM-NEXT: lui t4, 8 +; RV32IM-NEXT: lui t0, 32 +; RV32IM-NEXT: lui a6, 64 +; RV32IM-NEXT: lui a5, 128 +; RV32IM-NEXT: lui s1, 256 +; RV32IM-NEXT: lui t5, 512 +; RV32IM-NEXT: lui t6, 1024 +; RV32IM-NEXT: lui s4, 2048 +; RV32IM-NEXT: lui s2, 4096 +; RV32IM-NEXT: lui s3, 8192 +; RV32IM-NEXT: lui s7, 16384 +; RV32IM-NEXT: lui s5, 32768 +; RV32IM-NEXT: lui s6, 65536 +; RV32IM-NEXT: lui s11, 131072 +; RV32IM-NEXT: lui s8, 262144 +; RV32IM-NEXT: addi s10, s9, -256 +; RV32IM-NEXT: and a3, a3, s10 +; RV32IM-NEXT: or a3, a3, a4 +; RV32IM-NEXT: addi a7, a7, -241 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a4, ra, 819 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a1, a1, 1365 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: slli s0, s0, 11 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: srli a2, a0, 4 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and a2, a2, a7 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 2 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a1 +; RV32IM-NEXT: and a2, a2, a1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, a2, a0 +; RV32IM-NEXT: andi a0, a3, 2 +; RV32IM-NEXT: andi a1, a3, 1 +; RV32IM-NEXT: and a4, a3, s0 +; RV32IM-NEXT: and a7, a3, t1 +; RV32IM-NEXT: and s0, a3, t2 +; RV32IM-NEXT: and ra, a3, t3 +; RV32IM-NEXT: and a2, a3, t4 +; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s9 +; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t0 +; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a6, a3, a6 +; RV32IM-NEXT: and a5, a3, a5 +; RV32IM-NEXT: and s1, a3, s1 +; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t5 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t6, a3, t6 +; RV32IM-NEXT: and a2, a3, s4 +; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a3, s2 +; RV32IM-NEXT: and a2, a3, s3 +; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s7 +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s5 +; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s6 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s11 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s8 +; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: and a2, a3, a2 +; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a0, a3, 4 +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a3, 8 +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a2, a3, 16 +; RV32IM-NEXT: mul s9, a3, a2 +; RV32IM-NEXT: andi t0, a3, 32 +; RV32IM-NEXT: mul s6, a3, t0 +; RV32IM-NEXT: andi t1, a3, 64 +; RV32IM-NEXT: mul a0, a3, t1 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 128 +; RV32IM-NEXT: mul a0, a3, t2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 256 +; RV32IM-NEXT: mul s1, a3, t2 +; RV32IM-NEXT: andi t3, a3, 512 +; RV32IM-NEXT: mul t5, a3, t3 +; RV32IM-NEXT: andi t4, a3, 1024 +; RV32IM-NEXT: mul s5, a3, t4 +; RV32IM-NEXT: mul s8, a3, a4 +; RV32IM-NEXT: mul a0, a3, a7 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t2, a3, s0 +; RV32IM-NEXT: mul a7, a3, ra +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a3, a0 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: mul a0, a3, a6 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a6, a3, a5 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: mul t4, a3, t6 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a3, a0 +; RV32IM-NEXT: mul a2, a3, s2 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a0 +; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, a0 +; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t3, a3, a0 +; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, a0 +; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, a0 +; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, s11, a0 +; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s11, s11, ra +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor t5, s1, t5 +; RV32IM-NEXT: xor a7, t2, a7 +; RV32IM-NEXT: xor a4, a6, a4 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, s11 +; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, s6, a2 +; RV32IM-NEXT: xor a6, t5, s5 +; RV32IM-NEXT: xor a7, a7, s0 +; RV32IM-NEXT: xor a4, a4, t1 +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: xor a0, a0, a2 +; RV32IM-NEXT: xor a2, a6, s8 +; RV32IM-NEXT: xor a5, a7, s4 +; RV32IM-NEXT: xor a4, a4, t4 +; RV32IM-NEXT: xor a1, a1, t0 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a5, a5, s7 +; RV32IM-NEXT: xor a4, a4, s3 +; RV32IM-NEXT: xor a1, a1, t3 +; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a1, a1, t6 +; RV32IM-NEXT: xor a2, a0, a2 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: xor a1, a1, s2 +; RV32IM-NEXT: xor a2, a2, a4 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: and a3, a2, s10 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s10 +; RV32IM-NEXT: srli a1, a1, 24 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: or a1, a2, a1 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a2, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li a3, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s3, 16 +; RV64IM-NEXT: srli s0, a0, 56 +; RV64IM-NEXT: srliw t2, a0, 24 +; RV64IM-NEXT: slli t0, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t4, 209715 +; RV64IM-NEXT: lui t6, 349525 +; RV64IM-NEXT: li a7, 1 +; RV64IM-NEXT: lui s5, 2 +; RV64IM-NEXT: lui t1, 4 +; RV64IM-NEXT: lui a4, 128 +; RV64IM-NEXT: lui s7, 256 +; RV64IM-NEXT: lui s8, 4096 +; RV64IM-NEXT: lui s10, 8192 +; RV64IM-NEXT: lui a1, 4080 +; RV64IM-NEXT: and a2, a2, a1 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: addi s1, s3, -256 +; RV64IM-NEXT: and t5, a0, a1 +; RV64IM-NEXT: slli a1, t2, 32 +; RV64IM-NEXT: addi s9, t3, -241 +; RV64IM-NEXT: addi t4, t4, 819 +; RV64IM-NEXT: addi t2, t6, 1365 +; RV64IM-NEXT: slli t3, a7, 11 +; RV64IM-NEXT: slli s11, a7, 32 +; RV64IM-NEXT: slli ra, a7, 33 +; RV64IM-NEXT: slli t6, a7, 34 +; RV64IM-NEXT: slli s2, a7, 35 +; RV64IM-NEXT: slli s4, a7, 36 +; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a6, a3 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: slli a3, a7, 37 +; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a5, s1 +; RV64IM-NEXT: or a3, a3, s0 +; RV64IM-NEXT: slli a5, a7, 38 +; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, t5, 24 +; RV64IM-NEXT: and a0, a0, s1 +; RV64IM-NEXT: or a1, t5, a1 +; RV64IM-NEXT: slli a5, s9, 32 +; RV64IM-NEXT: add a5, s9, a5 +; RV64IM-NEXT: slli s0, t4, 32 +; RV64IM-NEXT: add t4, t4, s0 +; RV64IM-NEXT: slli s4, t2, 32 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: add t2, t2, s4 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a0, t0, a0 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t4 +; RV64IM-NEXT: and a1, a1, t4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t2 +; RV64IM-NEXT: and a1, a1, t2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or t0, a1, a0 +; RV64IM-NEXT: andi a0, t0, 2 +; RV64IM-NEXT: andi a1, t0, 1 +; RV64IM-NEXT: andi a2, t0, 4 +; RV64IM-NEXT: andi a3, t0, 8 +; RV64IM-NEXT: andi a5, t0, 16 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 32 +; RV64IM-NEXT: mul a1, t0, a2 +; RV64IM-NEXT: mul a2, t0, a3 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, t0, 256 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a2, a0 +; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 512 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a7, 39 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 40 +; RV64IM-NEXT: and a1, t0, a4 +; RV64IM-NEXT: and a2, t0, s7 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 41 +; RV64IM-NEXT: and a2, t0, s8 +; RV64IM-NEXT: and a3, t0, s10 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 48 +; RV64IM-NEXT: and a3, t0, s11 +; RV64IM-NEXT: and a4, t0, ra +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: mul a4, t0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a7, 49 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 56 +; RV64IM-NEXT: and a1, t0, a2 +; RV64IM-NEXT: and a2, t0, a3 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 57 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 42 +; RV64IM-NEXT: slli ra, a7, 43 +; RV64IM-NEXT: slli a3, a7, 44 +; RV64IM-NEXT: slli a4, a7, 45 +; RV64IM-NEXT: slli t5, a7, 46 +; RV64IM-NEXT: slli s0, a7, 47 +; RV64IM-NEXT: slli s1, a7, 50 +; RV64IM-NEXT: slli a0, a7, 51 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 52 +; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 53 +; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 54 +; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 55 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 58 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 59 +; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 60 +; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 61 +; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a7, a7, 62 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, t3 +; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: and a0, t0, s7 +; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 8 +; RV64IM-NEXT: and a0, t0, s8 +; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s3 +; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s6, 32 +; RV64IM-NEXT: and a0, t0, s6 +; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 64 +; RV64IM-NEXT: and a0, t0, s10 +; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 512 +; RV64IM-NEXT: and a0, t0, s11 +; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s4, 1024 +; RV64IM-NEXT: and a0, t0, s4 +; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s5, 2048 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 16384 +; RV64IM-NEXT: and a0, t0, s9 +; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a5, 32768 +; RV64IM-NEXT: and a5, t0, a5 +; RV64IM-NEXT: lui a6, 65536 +; RV64IM-NEXT: and a6, t0, a6 +; RV64IM-NEXT: lui t1, 131072 +; RV64IM-NEXT: and t1, t0, t1 +; RV64IM-NEXT: lui t2, 262144 +; RV64IM-NEXT: and t2, t0, t2 +; RV64IM-NEXT: and a0, t0, t6 +; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s2 +; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, t4 +; RV64IM-NEXT: and a7, t0, a2 +; RV64IM-NEXT: and ra, t0, ra +; RV64IM-NEXT: and t3, t0, a3 +; RV64IM-NEXT: and t4, t0, a4 +; RV64IM-NEXT: and t5, t0, t5 +; RV64IM-NEXT: and t6, t0, s0 +; RV64IM-NEXT: and s0, t0, s1 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, t0, a2 +; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, t0, a2 +; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, t0, a2 +; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, t0, a2 +; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, t0, a2 +; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, t0, a2 +; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, t0, a2 +; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, t0, a2 +; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, t0, a2 +; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, t0, a2 +; RV64IM-NEXT: andi s11, t0, 64 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 128 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 1024 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, t0, a2 +; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, t0, a2 +; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, t0, a2 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, a6 +; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t1 +; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t2 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srliw t2, t0, 31 +; RV64IM-NEXT: slli t2, t2, 31 +; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a5, t0, a5 +; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, t0, a6 +; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, t0, a6 +; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a1 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a7, t0, a7 +; RV64IM-NEXT: mul ra, t0, ra +; RV64IM-NEXT: mul a6, t0, t3 +; RV64IM-NEXT: mul t4, t0, t4 +; RV64IM-NEXT: mul t5, t0, t5 +; RV64IM-NEXT: mul a0, t0, t6 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t6, t0, s0 +; RV64IM-NEXT: mul s0, t0, s1 +; RV64IM-NEXT: mul s1, t0, s2 +; RV64IM-NEXT: mul s2, t0, s3 +; RV64IM-NEXT: mul s3, t0, s4 +; RV64IM-NEXT: mul s4, t0, s5 +; RV64IM-NEXT: mul s5, t0, s6 +; RV64IM-NEXT: mul s6, t0, s7 +; RV64IM-NEXT: mul s7, t0, s8 +; RV64IM-NEXT: mul s8, t0, s9 +; RV64IM-NEXT: mul s9, t0, s10 +; RV64IM-NEXT: srli s10, t0, 63 +; RV64IM-NEXT: slli s10, s10, 63 +; RV64IM-NEXT: mul t2, t0, t2 +; RV64IM-NEXT: mul t0, t0, s10 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, a0, a1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s11, t3, s11 +; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, t3, a4 +; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, t3, a3 +; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, t3, a2 +; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, t3, a7 +; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t6, t3, t6 +; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, t3, s5 +; RV64IM-NEXT: xor a0, s10, a0 +; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, s11, t3 +; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a5, a7, ra +; RV64IM-NEXT: xor a7, t6, s0 +; RV64IM-NEXT: xor t6, s5, s6 +; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, t3 +; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s10, t3 +; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: xor a6, a7, s1 +; RV64IM-NEXT: xor a7, t6, s7 +; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s0, t1 +; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t3 +; RV64IM-NEXT: xor a5, a5, t4 +; RV64IM-NEXT: xor a6, a6, s2 +; RV64IM-NEXT: xor a7, a7, s8 +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, t5 +; RV64IM-NEXT: xor a6, a6, s3 +; RV64IM-NEXT: xor a7, a7, s9 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a4 +; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a6, s4 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: xor a6, a7, t0 +; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a1, t0 +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: or a0, a0, a7 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a2, a1, a7 +; RV64IM-NEXT: xor a4, a1, a4 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: slli a2, a2, 24 +; RV64IM-NEXT: xor a5, a4, a5 +; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, a1, a3 +; RV64IM-NEXT: srli a4, a4, 24 +; RV64IM-NEXT: srliw a3, a5, 24 +; RV64IM-NEXT: and a4, a4, a7 +; RV64IM-NEXT: srli a7, a5, 40 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: slli a3, a3, 32 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: and a4, a7, t0 +; RV64IM-NEXT: srli a5, a5, 56 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a4, a4, a5 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i4 @llvm.clmulr.i4(i4 %a, i4 %b) + ret i4 %res +} + +define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { +; RV32IM-LABEL: clmulr_i8: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli a3, a0, 8 +; RV32IM-NEXT: lui s9, 16 +; RV32IM-NEXT: srli a4, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui a7, 61681 +; RV32IM-NEXT: lui ra, 209715 +; RV32IM-NEXT: lui a1, 349525 +; RV32IM-NEXT: li s0, 1 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: lui t2, 2 +; RV32IM-NEXT: lui t3, 4 +; RV32IM-NEXT: lui t4, 8 +; RV32IM-NEXT: lui t0, 32 +; RV32IM-NEXT: lui a6, 64 +; RV32IM-NEXT: lui a5, 128 +; RV32IM-NEXT: lui s1, 256 +; RV32IM-NEXT: lui t5, 512 +; RV32IM-NEXT: lui t6, 1024 +; RV32IM-NEXT: lui s4, 2048 +; RV32IM-NEXT: lui s2, 4096 +; RV32IM-NEXT: lui s3, 8192 +; RV32IM-NEXT: lui s7, 16384 +; RV32IM-NEXT: lui s5, 32768 +; RV32IM-NEXT: lui s6, 65536 +; RV32IM-NEXT: lui s11, 131072 +; RV32IM-NEXT: lui s8, 262144 +; RV32IM-NEXT: addi s10, s9, -256 +; RV32IM-NEXT: and a3, a3, s10 +; RV32IM-NEXT: or a3, a3, a4 +; RV32IM-NEXT: addi a7, a7, -241 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a4, ra, 819 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a1, a1, 1365 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: slli s0, s0, 11 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: srli a2, a0, 4 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and a2, a2, a7 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 2 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a1 +; RV32IM-NEXT: and a2, a2, a1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, a2, a0 +; RV32IM-NEXT: andi a0, a3, 2 +; RV32IM-NEXT: andi a1, a3, 1 +; RV32IM-NEXT: and a4, a3, s0 +; RV32IM-NEXT: and a7, a3, t1 +; RV32IM-NEXT: and s0, a3, t2 +; RV32IM-NEXT: and ra, a3, t3 +; RV32IM-NEXT: and a2, a3, t4 +; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s9 +; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t0 +; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a6, a3, a6 +; RV32IM-NEXT: and a5, a3, a5 +; RV32IM-NEXT: and s1, a3, s1 +; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t5 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t6, a3, t6 +; RV32IM-NEXT: and a2, a3, s4 +; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a3, s2 +; RV32IM-NEXT: and a2, a3, s3 +; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s7 +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s5 +; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s6 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s11 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s8 +; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: and a2, a3, a2 +; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a0, a3, 4 +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a3, 8 +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a2, a3, 16 +; RV32IM-NEXT: mul s9, a3, a2 +; RV32IM-NEXT: andi t0, a3, 32 +; RV32IM-NEXT: mul s6, a3, t0 +; RV32IM-NEXT: andi t1, a3, 64 +; RV32IM-NEXT: mul a0, a3, t1 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 128 +; RV32IM-NEXT: mul a0, a3, t2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 256 +; RV32IM-NEXT: mul s1, a3, t2 +; RV32IM-NEXT: andi t3, a3, 512 +; RV32IM-NEXT: mul t5, a3, t3 +; RV32IM-NEXT: andi t4, a3, 1024 +; RV32IM-NEXT: mul s5, a3, t4 +; RV32IM-NEXT: mul s8, a3, a4 +; RV32IM-NEXT: mul a0, a3, a7 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t2, a3, s0 +; RV32IM-NEXT: mul a7, a3, ra +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a3, a0 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: mul a0, a3, a6 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a6, a3, a5 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: mul t4, a3, t6 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a3, a0 +; RV32IM-NEXT: mul a2, a3, s2 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a0 +; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, a0 +; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t3, a3, a0 +; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, a0 +; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, a0 +; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, s11, a0 +; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s11, s11, ra +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor t5, s1, t5 +; RV32IM-NEXT: xor a7, t2, a7 +; RV32IM-NEXT: xor a4, a6, a4 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, s11 +; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, s6, a2 +; RV32IM-NEXT: xor a6, t5, s5 +; RV32IM-NEXT: xor a7, a7, s0 +; RV32IM-NEXT: xor a4, a4, t1 +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: xor a0, a0, a2 +; RV32IM-NEXT: xor a2, a6, s8 +; RV32IM-NEXT: xor a5, a7, s4 +; RV32IM-NEXT: xor a4, a4, t4 +; RV32IM-NEXT: xor a1, a1, t0 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a5, a5, s7 +; RV32IM-NEXT: xor a4, a4, s3 +; RV32IM-NEXT: xor a1, a1, t3 +; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a1, a1, t6 +; RV32IM-NEXT: xor a2, a0, a2 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: xor a1, a1, s2 +; RV32IM-NEXT: xor a2, a2, a4 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: and a3, a2, s10 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s10 +; RV32IM-NEXT: srli a1, a1, 24 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: or a1, a2, a1 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i8: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a2, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li a3, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s3, 16 +; RV64IM-NEXT: srli s0, a0, 56 +; RV64IM-NEXT: srliw t2, a0, 24 +; RV64IM-NEXT: slli t0, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t4, 209715 +; RV64IM-NEXT: lui t6, 349525 +; RV64IM-NEXT: li a7, 1 +; RV64IM-NEXT: lui s5, 2 +; RV64IM-NEXT: lui t1, 4 +; RV64IM-NEXT: lui a4, 128 +; RV64IM-NEXT: lui s7, 256 +; RV64IM-NEXT: lui s8, 4096 +; RV64IM-NEXT: lui s10, 8192 +; RV64IM-NEXT: lui a1, 4080 +; RV64IM-NEXT: and a2, a2, a1 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: addi s1, s3, -256 +; RV64IM-NEXT: and t5, a0, a1 +; RV64IM-NEXT: slli a1, t2, 32 +; RV64IM-NEXT: addi s9, t3, -241 +; RV64IM-NEXT: addi t4, t4, 819 +; RV64IM-NEXT: addi t2, t6, 1365 +; RV64IM-NEXT: slli t3, a7, 11 +; RV64IM-NEXT: slli s11, a7, 32 +; RV64IM-NEXT: slli ra, a7, 33 +; RV64IM-NEXT: slli t6, a7, 34 +; RV64IM-NEXT: slli s2, a7, 35 +; RV64IM-NEXT: slli s4, a7, 36 +; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a6, a3 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: slli a3, a7, 37 +; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a5, s1 +; RV64IM-NEXT: or a3, a3, s0 +; RV64IM-NEXT: slli a5, a7, 38 +; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, t5, 24 +; RV64IM-NEXT: and a0, a0, s1 +; RV64IM-NEXT: or a1, t5, a1 +; RV64IM-NEXT: slli a5, s9, 32 +; RV64IM-NEXT: add a5, s9, a5 +; RV64IM-NEXT: slli s0, t4, 32 +; RV64IM-NEXT: add t4, t4, s0 +; RV64IM-NEXT: slli s4, t2, 32 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: add t2, t2, s4 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a0, t0, a0 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t4 +; RV64IM-NEXT: and a1, a1, t4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t2 +; RV64IM-NEXT: and a1, a1, t2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or t0, a1, a0 +; RV64IM-NEXT: andi a0, t0, 2 +; RV64IM-NEXT: andi a1, t0, 1 +; RV64IM-NEXT: andi a2, t0, 4 +; RV64IM-NEXT: andi a3, t0, 8 +; RV64IM-NEXT: andi a5, t0, 16 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 32 +; RV64IM-NEXT: mul a1, t0, a2 +; RV64IM-NEXT: mul a2, t0, a3 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, t0, 256 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a2, a0 +; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 512 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a7, 39 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 40 +; RV64IM-NEXT: and a1, t0, a4 +; RV64IM-NEXT: and a2, t0, s7 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 41 +; RV64IM-NEXT: and a2, t0, s8 +; RV64IM-NEXT: and a3, t0, s10 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 48 +; RV64IM-NEXT: and a3, t0, s11 +; RV64IM-NEXT: and a4, t0, ra +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: mul a4, t0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a7, 49 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 56 +; RV64IM-NEXT: and a1, t0, a2 +; RV64IM-NEXT: and a2, t0, a3 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 57 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 42 +; RV64IM-NEXT: slli ra, a7, 43 +; RV64IM-NEXT: slli a3, a7, 44 +; RV64IM-NEXT: slli a4, a7, 45 +; RV64IM-NEXT: slli t5, a7, 46 +; RV64IM-NEXT: slli s0, a7, 47 +; RV64IM-NEXT: slli s1, a7, 50 +; RV64IM-NEXT: slli a0, a7, 51 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 52 +; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 53 +; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 54 +; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 55 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 58 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 59 +; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 60 +; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 61 +; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a7, a7, 62 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, t3 +; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: and a0, t0, s7 +; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 8 +; RV64IM-NEXT: and a0, t0, s8 +; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s3 +; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s6, 32 +; RV64IM-NEXT: and a0, t0, s6 +; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 64 +; RV64IM-NEXT: and a0, t0, s10 +; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 512 +; RV64IM-NEXT: and a0, t0, s11 +; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s4, 1024 +; RV64IM-NEXT: and a0, t0, s4 +; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s5, 2048 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 16384 +; RV64IM-NEXT: and a0, t0, s9 +; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a5, 32768 +; RV64IM-NEXT: and a5, t0, a5 +; RV64IM-NEXT: lui a6, 65536 +; RV64IM-NEXT: and a6, t0, a6 +; RV64IM-NEXT: lui t1, 131072 +; RV64IM-NEXT: and t1, t0, t1 +; RV64IM-NEXT: lui t2, 262144 +; RV64IM-NEXT: and t2, t0, t2 +; RV64IM-NEXT: and a0, t0, t6 +; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s2 +; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, t4 +; RV64IM-NEXT: and a7, t0, a2 +; RV64IM-NEXT: and ra, t0, ra +; RV64IM-NEXT: and t3, t0, a3 +; RV64IM-NEXT: and t4, t0, a4 +; RV64IM-NEXT: and t5, t0, t5 +; RV64IM-NEXT: and t6, t0, s0 +; RV64IM-NEXT: and s0, t0, s1 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, t0, a2 +; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, t0, a2 +; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, t0, a2 +; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, t0, a2 +; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, t0, a2 +; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, t0, a2 +; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, t0, a2 +; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, t0, a2 +; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, t0, a2 +; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, t0, a2 +; RV64IM-NEXT: andi s11, t0, 64 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 128 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 1024 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, t0, a2 +; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, t0, a2 +; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, t0, a2 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, a6 +; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t1 +; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t2 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srliw t2, t0, 31 +; RV64IM-NEXT: slli t2, t2, 31 +; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a5, t0, a5 +; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, t0, a6 +; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, t0, a6 +; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a1 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a7, t0, a7 +; RV64IM-NEXT: mul ra, t0, ra +; RV64IM-NEXT: mul a6, t0, t3 +; RV64IM-NEXT: mul t4, t0, t4 +; RV64IM-NEXT: mul t5, t0, t5 +; RV64IM-NEXT: mul a0, t0, t6 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t6, t0, s0 +; RV64IM-NEXT: mul s0, t0, s1 +; RV64IM-NEXT: mul s1, t0, s2 +; RV64IM-NEXT: mul s2, t0, s3 +; RV64IM-NEXT: mul s3, t0, s4 +; RV64IM-NEXT: mul s4, t0, s5 +; RV64IM-NEXT: mul s5, t0, s6 +; RV64IM-NEXT: mul s6, t0, s7 +; RV64IM-NEXT: mul s7, t0, s8 +; RV64IM-NEXT: mul s8, t0, s9 +; RV64IM-NEXT: mul s9, t0, s10 +; RV64IM-NEXT: srli s10, t0, 63 +; RV64IM-NEXT: slli s10, s10, 63 +; RV64IM-NEXT: mul t2, t0, t2 +; RV64IM-NEXT: mul t0, t0, s10 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, a0, a1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s11, t3, s11 +; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, t3, a4 +; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, t3, a3 +; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, t3, a2 +; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, t3, a7 +; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t6, t3, t6 +; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, t3, s5 +; RV64IM-NEXT: xor a0, s10, a0 +; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, s11, t3 +; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a5, a7, ra +; RV64IM-NEXT: xor a7, t6, s0 +; RV64IM-NEXT: xor t6, s5, s6 +; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, t3 +; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s10, t3 +; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: xor a6, a7, s1 +; RV64IM-NEXT: xor a7, t6, s7 +; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s0, t1 +; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t3 +; RV64IM-NEXT: xor a5, a5, t4 +; RV64IM-NEXT: xor a6, a6, s2 +; RV64IM-NEXT: xor a7, a7, s8 +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, t5 +; RV64IM-NEXT: xor a6, a6, s3 +; RV64IM-NEXT: xor a7, a7, s9 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a4 +; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a6, s4 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: xor a6, a7, t0 +; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a1, t0 +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: or a0, a0, a7 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a2, a1, a7 +; RV64IM-NEXT: xor a4, a1, a4 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: slli a2, a2, 24 +; RV64IM-NEXT: xor a5, a4, a5 +; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, a1, a3 +; RV64IM-NEXT: srli a4, a4, 24 +; RV64IM-NEXT: srliw a3, a5, 24 +; RV64IM-NEXT: and a4, a4, a7 +; RV64IM-NEXT: srli a7, a5, 40 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: slli a3, a3, 32 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: and a4, a7, t0 +; RV64IM-NEXT: srli a5, a5, 56 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a4, a4, a5 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i8 @llvm.clmulr.i8(i8 %a, i8 %b) + ret i8 %res +} + +define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { +; RV32IM-LABEL: clmulr_i16: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli a3, a0, 8 +; RV32IM-NEXT: lui s9, 16 +; RV32IM-NEXT: srli a4, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui a7, 61681 +; RV32IM-NEXT: lui ra, 209715 +; RV32IM-NEXT: lui a1, 349525 +; RV32IM-NEXT: li s0, 1 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: lui t2, 2 +; RV32IM-NEXT: lui t3, 4 +; RV32IM-NEXT: lui t4, 8 +; RV32IM-NEXT: lui t0, 32 +; RV32IM-NEXT: lui a6, 64 +; RV32IM-NEXT: lui a5, 128 +; RV32IM-NEXT: lui s1, 256 +; RV32IM-NEXT: lui t5, 512 +; RV32IM-NEXT: lui t6, 1024 +; RV32IM-NEXT: lui s4, 2048 +; RV32IM-NEXT: lui s2, 4096 +; RV32IM-NEXT: lui s3, 8192 +; RV32IM-NEXT: lui s7, 16384 +; RV32IM-NEXT: lui s5, 32768 +; RV32IM-NEXT: lui s6, 65536 +; RV32IM-NEXT: lui s11, 131072 +; RV32IM-NEXT: lui s8, 262144 +; RV32IM-NEXT: addi s10, s9, -256 +; RV32IM-NEXT: and a3, a3, s10 +; RV32IM-NEXT: or a3, a3, a4 +; RV32IM-NEXT: addi a7, a7, -241 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a4, ra, 819 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a1, a1, 1365 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: slli s0, s0, 11 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: srli a2, a0, 4 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and a2, a2, a7 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 2 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a1 +; RV32IM-NEXT: and a2, a2, a1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, a2, a0 +; RV32IM-NEXT: andi a0, a3, 2 +; RV32IM-NEXT: andi a1, a3, 1 +; RV32IM-NEXT: and a4, a3, s0 +; RV32IM-NEXT: and a7, a3, t1 +; RV32IM-NEXT: and s0, a3, t2 +; RV32IM-NEXT: and ra, a3, t3 +; RV32IM-NEXT: and a2, a3, t4 +; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s9 +; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t0 +; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a6, a3, a6 +; RV32IM-NEXT: and a5, a3, a5 +; RV32IM-NEXT: and s1, a3, s1 +; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t5 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t6, a3, t6 +; RV32IM-NEXT: and a2, a3, s4 +; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a3, s2 +; RV32IM-NEXT: and a2, a3, s3 +; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s7 +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s5 +; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s6 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s11 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s8 +; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: and a2, a3, a2 +; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a0, a3, 4 +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a3, 8 +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a2, a3, 16 +; RV32IM-NEXT: mul s9, a3, a2 +; RV32IM-NEXT: andi t0, a3, 32 +; RV32IM-NEXT: mul s6, a3, t0 +; RV32IM-NEXT: andi t1, a3, 64 +; RV32IM-NEXT: mul a0, a3, t1 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 128 +; RV32IM-NEXT: mul a0, a3, t2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 256 +; RV32IM-NEXT: mul s1, a3, t2 +; RV32IM-NEXT: andi t3, a3, 512 +; RV32IM-NEXT: mul t5, a3, t3 +; RV32IM-NEXT: andi t4, a3, 1024 +; RV32IM-NEXT: mul s5, a3, t4 +; RV32IM-NEXT: mul s8, a3, a4 +; RV32IM-NEXT: mul a0, a3, a7 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t2, a3, s0 +; RV32IM-NEXT: mul a7, a3, ra +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a3, a0 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: mul a0, a3, a6 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a6, a3, a5 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: mul t4, a3, t6 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a3, a0 +; RV32IM-NEXT: mul a2, a3, s2 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a0 +; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, a0 +; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t3, a3, a0 +; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, a0 +; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, a0 +; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, s11, a0 +; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s11, s11, ra +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor t5, s1, t5 +; RV32IM-NEXT: xor a7, t2, a7 +; RV32IM-NEXT: xor a4, a6, a4 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, s11 +; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, s6, a2 +; RV32IM-NEXT: xor a6, t5, s5 +; RV32IM-NEXT: xor a7, a7, s0 +; RV32IM-NEXT: xor a4, a4, t1 +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: xor a0, a0, a2 +; RV32IM-NEXT: xor a2, a6, s8 +; RV32IM-NEXT: xor a5, a7, s4 +; RV32IM-NEXT: xor a4, a4, t4 +; RV32IM-NEXT: xor a1, a1, t0 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a5, a5, s7 +; RV32IM-NEXT: xor a4, a4, s3 +; RV32IM-NEXT: xor a1, a1, t3 +; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a1, a1, t6 +; RV32IM-NEXT: xor a2, a0, a2 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: xor a1, a1, s2 +; RV32IM-NEXT: xor a2, a2, a4 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: and a3, a2, s10 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s10 +; RV32IM-NEXT: srli a1, a1, 24 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: or a1, a2, a1 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i16: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a2, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li a3, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s3, 16 +; RV64IM-NEXT: srli s0, a0, 56 +; RV64IM-NEXT: srliw t2, a0, 24 +; RV64IM-NEXT: slli t0, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t4, 209715 +; RV64IM-NEXT: lui t6, 349525 +; RV64IM-NEXT: li a7, 1 +; RV64IM-NEXT: lui s5, 2 +; RV64IM-NEXT: lui t1, 4 +; RV64IM-NEXT: lui a4, 128 +; RV64IM-NEXT: lui s7, 256 +; RV64IM-NEXT: lui s8, 4096 +; RV64IM-NEXT: lui s10, 8192 +; RV64IM-NEXT: lui a1, 4080 +; RV64IM-NEXT: and a2, a2, a1 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: addi s1, s3, -256 +; RV64IM-NEXT: and t5, a0, a1 +; RV64IM-NEXT: slli a1, t2, 32 +; RV64IM-NEXT: addi s9, t3, -241 +; RV64IM-NEXT: addi t4, t4, 819 +; RV64IM-NEXT: addi t2, t6, 1365 +; RV64IM-NEXT: slli t3, a7, 11 +; RV64IM-NEXT: slli s11, a7, 32 +; RV64IM-NEXT: slli ra, a7, 33 +; RV64IM-NEXT: slli t6, a7, 34 +; RV64IM-NEXT: slli s2, a7, 35 +; RV64IM-NEXT: slli s4, a7, 36 +; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a6, a3 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: slli a3, a7, 37 +; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a5, s1 +; RV64IM-NEXT: or a3, a3, s0 +; RV64IM-NEXT: slli a5, a7, 38 +; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, t5, 24 +; RV64IM-NEXT: and a0, a0, s1 +; RV64IM-NEXT: or a1, t5, a1 +; RV64IM-NEXT: slli a5, s9, 32 +; RV64IM-NEXT: add a5, s9, a5 +; RV64IM-NEXT: slli s0, t4, 32 +; RV64IM-NEXT: add t4, t4, s0 +; RV64IM-NEXT: slli s4, t2, 32 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: add t2, t2, s4 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a0, t0, a0 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t4 +; RV64IM-NEXT: and a1, a1, t4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t2 +; RV64IM-NEXT: and a1, a1, t2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or t0, a1, a0 +; RV64IM-NEXT: andi a0, t0, 2 +; RV64IM-NEXT: andi a1, t0, 1 +; RV64IM-NEXT: andi a2, t0, 4 +; RV64IM-NEXT: andi a3, t0, 8 +; RV64IM-NEXT: andi a5, t0, 16 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 32 +; RV64IM-NEXT: mul a1, t0, a2 +; RV64IM-NEXT: mul a2, t0, a3 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, t0, 256 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a2, a0 +; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 512 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a7, 39 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 40 +; RV64IM-NEXT: and a1, t0, a4 +; RV64IM-NEXT: and a2, t0, s7 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 41 +; RV64IM-NEXT: and a2, t0, s8 +; RV64IM-NEXT: and a3, t0, s10 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 48 +; RV64IM-NEXT: and a3, t0, s11 +; RV64IM-NEXT: and a4, t0, ra +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: mul a4, t0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a7, 49 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 56 +; RV64IM-NEXT: and a1, t0, a2 +; RV64IM-NEXT: and a2, t0, a3 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 57 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 42 +; RV64IM-NEXT: slli ra, a7, 43 +; RV64IM-NEXT: slli a3, a7, 44 +; RV64IM-NEXT: slli a4, a7, 45 +; RV64IM-NEXT: slli t5, a7, 46 +; RV64IM-NEXT: slli s0, a7, 47 +; RV64IM-NEXT: slli s1, a7, 50 +; RV64IM-NEXT: slli a0, a7, 51 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 52 +; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 53 +; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 54 +; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 55 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 58 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 59 +; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 60 +; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 61 +; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a7, a7, 62 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, t3 +; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: and a0, t0, s7 +; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 8 +; RV64IM-NEXT: and a0, t0, s8 +; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s3 +; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s6, 32 +; RV64IM-NEXT: and a0, t0, s6 +; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 64 +; RV64IM-NEXT: and a0, t0, s10 +; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 512 +; RV64IM-NEXT: and a0, t0, s11 +; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s4, 1024 +; RV64IM-NEXT: and a0, t0, s4 +; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s5, 2048 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 16384 +; RV64IM-NEXT: and a0, t0, s9 +; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a5, 32768 +; RV64IM-NEXT: and a5, t0, a5 +; RV64IM-NEXT: lui a6, 65536 +; RV64IM-NEXT: and a6, t0, a6 +; RV64IM-NEXT: lui t1, 131072 +; RV64IM-NEXT: and t1, t0, t1 +; RV64IM-NEXT: lui t2, 262144 +; RV64IM-NEXT: and t2, t0, t2 +; RV64IM-NEXT: and a0, t0, t6 +; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s2 +; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, t4 +; RV64IM-NEXT: and a7, t0, a2 +; RV64IM-NEXT: and ra, t0, ra +; RV64IM-NEXT: and t3, t0, a3 +; RV64IM-NEXT: and t4, t0, a4 +; RV64IM-NEXT: and t5, t0, t5 +; RV64IM-NEXT: and t6, t0, s0 +; RV64IM-NEXT: and s0, t0, s1 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, t0, a2 +; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, t0, a2 +; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, t0, a2 +; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, t0, a2 +; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, t0, a2 +; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, t0, a2 +; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, t0, a2 +; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, t0, a2 +; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, t0, a2 +; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, t0, a2 +; RV64IM-NEXT: andi s11, t0, 64 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 128 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 1024 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, t0, a2 +; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, t0, a2 +; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, t0, a2 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, a6 +; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t1 +; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t2 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srliw t2, t0, 31 +; RV64IM-NEXT: slli t2, t2, 31 +; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a5, t0, a5 +; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, t0, a6 +; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, t0, a6 +; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a1 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a7, t0, a7 +; RV64IM-NEXT: mul ra, t0, ra +; RV64IM-NEXT: mul a6, t0, t3 +; RV64IM-NEXT: mul t4, t0, t4 +; RV64IM-NEXT: mul t5, t0, t5 +; RV64IM-NEXT: mul a0, t0, t6 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t6, t0, s0 +; RV64IM-NEXT: mul s0, t0, s1 +; RV64IM-NEXT: mul s1, t0, s2 +; RV64IM-NEXT: mul s2, t0, s3 +; RV64IM-NEXT: mul s3, t0, s4 +; RV64IM-NEXT: mul s4, t0, s5 +; RV64IM-NEXT: mul s5, t0, s6 +; RV64IM-NEXT: mul s6, t0, s7 +; RV64IM-NEXT: mul s7, t0, s8 +; RV64IM-NEXT: mul s8, t0, s9 +; RV64IM-NEXT: mul s9, t0, s10 +; RV64IM-NEXT: srli s10, t0, 63 +; RV64IM-NEXT: slli s10, s10, 63 +; RV64IM-NEXT: mul t2, t0, t2 +; RV64IM-NEXT: mul t0, t0, s10 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, a0, a1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s11, t3, s11 +; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, t3, a4 +; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, t3, a3 +; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, t3, a2 +; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, t3, a7 +; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t6, t3, t6 +; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, t3, s5 +; RV64IM-NEXT: xor a0, s10, a0 +; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, s11, t3 +; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a5, a7, ra +; RV64IM-NEXT: xor a7, t6, s0 +; RV64IM-NEXT: xor t6, s5, s6 +; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, t3 +; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s10, t3 +; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: xor a6, a7, s1 +; RV64IM-NEXT: xor a7, t6, s7 +; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s0, t1 +; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t3 +; RV64IM-NEXT: xor a5, a5, t4 +; RV64IM-NEXT: xor a6, a6, s2 +; RV64IM-NEXT: xor a7, a7, s8 +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, t5 +; RV64IM-NEXT: xor a6, a6, s3 +; RV64IM-NEXT: xor a7, a7, s9 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a4 +; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a6, s4 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: xor a6, a7, t0 +; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a1, t0 +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: or a0, a0, a7 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a2, a1, a7 +; RV64IM-NEXT: xor a4, a1, a4 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: slli a2, a2, 24 +; RV64IM-NEXT: xor a5, a4, a5 +; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, a1, a3 +; RV64IM-NEXT: srli a4, a4, 24 +; RV64IM-NEXT: srliw a3, a5, 24 +; RV64IM-NEXT: and a4, a4, a7 +; RV64IM-NEXT: srli a7, a5, 40 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: slli a3, a3, 32 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: and a4, a7, t0 +; RV64IM-NEXT: srli a5, a5, 56 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a4, a4, a5 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i16 @llvm.clmulr.i16(i16 %a, i16 %b) + ret i16 %res +} + +define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { +; RV32IM-LABEL: clmulr_i32: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli a3, a0, 8 +; RV32IM-NEXT: lui s9, 16 +; RV32IM-NEXT: srli a4, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui a7, 61681 +; RV32IM-NEXT: lui ra, 209715 +; RV32IM-NEXT: lui a1, 349525 +; RV32IM-NEXT: li s0, 1 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: lui t2, 2 +; RV32IM-NEXT: lui t3, 4 +; RV32IM-NEXT: lui t4, 8 +; RV32IM-NEXT: lui t0, 32 +; RV32IM-NEXT: lui a6, 64 +; RV32IM-NEXT: lui a5, 128 +; RV32IM-NEXT: lui s1, 256 +; RV32IM-NEXT: lui t5, 512 +; RV32IM-NEXT: lui t6, 1024 +; RV32IM-NEXT: lui s4, 2048 +; RV32IM-NEXT: lui s2, 4096 +; RV32IM-NEXT: lui s3, 8192 +; RV32IM-NEXT: lui s7, 16384 +; RV32IM-NEXT: lui s5, 32768 +; RV32IM-NEXT: lui s6, 65536 +; RV32IM-NEXT: lui s11, 131072 +; RV32IM-NEXT: lui s8, 262144 +; RV32IM-NEXT: addi s10, s9, -256 +; RV32IM-NEXT: and a3, a3, s10 +; RV32IM-NEXT: or a3, a3, a4 +; RV32IM-NEXT: addi a7, a7, -241 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a4, ra, 819 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi a1, a1, 1365 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: slli s0, s0, 11 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: srli a2, a0, 4 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and a2, a2, a7 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 2 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a1 +; RV32IM-NEXT: and a2, a2, a1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, a2, a0 +; RV32IM-NEXT: andi a0, a3, 2 +; RV32IM-NEXT: andi a1, a3, 1 +; RV32IM-NEXT: and a4, a3, s0 +; RV32IM-NEXT: and a7, a3, t1 +; RV32IM-NEXT: and s0, a3, t2 +; RV32IM-NEXT: and ra, a3, t3 +; RV32IM-NEXT: and a2, a3, t4 +; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s9 +; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t0 +; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a6, a3, a6 +; RV32IM-NEXT: and a5, a3, a5 +; RV32IM-NEXT: and s1, a3, s1 +; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, t5 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t6, a3, t6 +; RV32IM-NEXT: and a2, a3, s4 +; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a3, s2 +; RV32IM-NEXT: and a2, a3, s3 +; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s7 +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s5 +; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s6 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s11 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a3, s8 +; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: and a2, a3, a2 +; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a0, a3, 4 +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a3, 8 +; RV32IM-NEXT: mul a0, a3, a1 +; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a2, a3, 16 +; RV32IM-NEXT: mul s9, a3, a2 +; RV32IM-NEXT: andi t0, a3, 32 +; RV32IM-NEXT: mul s6, a3, t0 +; RV32IM-NEXT: andi t1, a3, 64 +; RV32IM-NEXT: mul a0, a3, t1 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 128 +; RV32IM-NEXT: mul a0, a3, t2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t2, a3, 256 +; RV32IM-NEXT: mul s1, a3, t2 +; RV32IM-NEXT: andi t3, a3, 512 +; RV32IM-NEXT: mul t5, a3, t3 +; RV32IM-NEXT: andi t4, a3, 1024 +; RV32IM-NEXT: mul s5, a3, t4 +; RV32IM-NEXT: mul s8, a3, a4 +; RV32IM-NEXT: mul a0, a3, a7 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t2, a3, s0 +; RV32IM-NEXT: mul a7, a3, ra +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a3, a0 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: mul a0, a3, a6 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a6, a3, a5 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: mul t4, a3, t6 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a3, a0 +; RV32IM-NEXT: mul a2, a3, s2 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a0 +; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, a0 +; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t3, a3, a0 +; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, a0 +; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, a0 +; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, s11, a0 +; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s11, s11, ra +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor t5, s1, t5 +; RV32IM-NEXT: xor a7, t2, a7 +; RV32IM-NEXT: xor a4, a6, a4 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: xor a0, a0, s11 +; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, s6, a2 +; RV32IM-NEXT: xor a6, t5, s5 +; RV32IM-NEXT: xor a7, a7, s0 +; RV32IM-NEXT: xor a4, a4, t1 +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: xor a0, a0, a2 +; RV32IM-NEXT: xor a2, a6, s8 +; RV32IM-NEXT: xor a5, a7, s4 +; RV32IM-NEXT: xor a4, a4, t4 +; RV32IM-NEXT: xor a1, a1, t0 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a5, a5, s7 +; RV32IM-NEXT: xor a4, a4, s3 +; RV32IM-NEXT: xor a1, a1, t3 +; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a1, a1, t6 +; RV32IM-NEXT: xor a2, a0, a2 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: xor a1, a1, s2 +; RV32IM-NEXT: xor a2, a2, a4 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: and a3, a2, s10 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s10 +; RV32IM-NEXT: srli a1, a1, 24 +; RV32IM-NEXT: or a0, a0, a3 +; RV32IM-NEXT: or a1, a2, a1 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a2 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i32: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a2, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li a3, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s3, 16 +; RV64IM-NEXT: srli s0, a0, 56 +; RV64IM-NEXT: srliw t2, a0, 24 +; RV64IM-NEXT: slli t0, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t4, 209715 +; RV64IM-NEXT: lui t6, 349525 +; RV64IM-NEXT: li a7, 1 +; RV64IM-NEXT: lui s5, 2 +; RV64IM-NEXT: lui t1, 4 +; RV64IM-NEXT: lui a4, 128 +; RV64IM-NEXT: lui s7, 256 +; RV64IM-NEXT: lui s8, 4096 +; RV64IM-NEXT: lui s10, 8192 +; RV64IM-NEXT: lui a1, 4080 +; RV64IM-NEXT: and a2, a2, a1 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: addi s1, s3, -256 +; RV64IM-NEXT: and t5, a0, a1 +; RV64IM-NEXT: slli a1, t2, 32 +; RV64IM-NEXT: addi s9, t3, -241 +; RV64IM-NEXT: addi t4, t4, 819 +; RV64IM-NEXT: addi t2, t6, 1365 +; RV64IM-NEXT: slli t3, a7, 11 +; RV64IM-NEXT: slli s11, a7, 32 +; RV64IM-NEXT: slli ra, a7, 33 +; RV64IM-NEXT: slli t6, a7, 34 +; RV64IM-NEXT: slli s2, a7, 35 +; RV64IM-NEXT: slli s4, a7, 36 +; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a6, a3 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: slli a3, a7, 37 +; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a5, s1 +; RV64IM-NEXT: or a3, a3, s0 +; RV64IM-NEXT: slli a5, a7, 38 +; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, t5, 24 +; RV64IM-NEXT: and a0, a0, s1 +; RV64IM-NEXT: or a1, t5, a1 +; RV64IM-NEXT: slli a5, s9, 32 +; RV64IM-NEXT: add a5, s9, a5 +; RV64IM-NEXT: slli s0, t4, 32 +; RV64IM-NEXT: add t4, t4, s0 +; RV64IM-NEXT: slli s4, t2, 32 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: add t2, t2, s4 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a0, t0, a0 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t4 +; RV64IM-NEXT: and a1, a1, t4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t2 +; RV64IM-NEXT: and a1, a1, t2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or t0, a1, a0 +; RV64IM-NEXT: andi a0, t0, 2 +; RV64IM-NEXT: andi a1, t0, 1 +; RV64IM-NEXT: andi a2, t0, 4 +; RV64IM-NEXT: andi a3, t0, 8 +; RV64IM-NEXT: andi a5, t0, 16 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 32 +; RV64IM-NEXT: mul a1, t0, a2 +; RV64IM-NEXT: mul a2, t0, a3 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, t0, 256 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a2, a0 +; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 512 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a7, 39 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 40 +; RV64IM-NEXT: and a1, t0, a4 +; RV64IM-NEXT: and a2, t0, s7 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 41 +; RV64IM-NEXT: and a2, t0, s8 +; RV64IM-NEXT: and a3, t0, s10 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 48 +; RV64IM-NEXT: and a3, t0, s11 +; RV64IM-NEXT: and a4, t0, ra +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: mul a4, t0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a7, 49 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 56 +; RV64IM-NEXT: and a1, t0, a2 +; RV64IM-NEXT: and a2, t0, a3 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 57 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 42 +; RV64IM-NEXT: slli ra, a7, 43 +; RV64IM-NEXT: slli a3, a7, 44 +; RV64IM-NEXT: slli a4, a7, 45 +; RV64IM-NEXT: slli t5, a7, 46 +; RV64IM-NEXT: slli s0, a7, 47 +; RV64IM-NEXT: slli s1, a7, 50 +; RV64IM-NEXT: slli a0, a7, 51 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 52 +; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 53 +; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 54 +; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 55 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 58 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 59 +; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 60 +; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 61 +; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a7, a7, 62 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, t3 +; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: and a0, t0, s7 +; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 8 +; RV64IM-NEXT: and a0, t0, s8 +; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s3 +; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s6, 32 +; RV64IM-NEXT: and a0, t0, s6 +; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 64 +; RV64IM-NEXT: and a0, t0, s10 +; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 512 +; RV64IM-NEXT: and a0, t0, s11 +; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s4, 1024 +; RV64IM-NEXT: and a0, t0, s4 +; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s5, 2048 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 16384 +; RV64IM-NEXT: and a0, t0, s9 +; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a5, 32768 +; RV64IM-NEXT: and a5, t0, a5 +; RV64IM-NEXT: lui a6, 65536 +; RV64IM-NEXT: and a6, t0, a6 +; RV64IM-NEXT: lui t1, 131072 +; RV64IM-NEXT: and t1, t0, t1 +; RV64IM-NEXT: lui t2, 262144 +; RV64IM-NEXT: and t2, t0, t2 +; RV64IM-NEXT: and a0, t0, t6 +; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s2 +; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, t4 +; RV64IM-NEXT: and a7, t0, a2 +; RV64IM-NEXT: and ra, t0, ra +; RV64IM-NEXT: and t3, t0, a3 +; RV64IM-NEXT: and t4, t0, a4 +; RV64IM-NEXT: and t5, t0, t5 +; RV64IM-NEXT: and t6, t0, s0 +; RV64IM-NEXT: and s0, t0, s1 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, t0, a2 +; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, t0, a2 +; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, t0, a2 +; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, t0, a2 +; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, t0, a2 +; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, t0, a2 +; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, t0, a2 +; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, t0, a2 +; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, t0, a2 +; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, t0, a2 +; RV64IM-NEXT: andi s11, t0, 64 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 128 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 1024 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, t0, a2 +; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, t0, a2 +; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, t0, a2 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, a6 +; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t1 +; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t2 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srliw t2, t0, 31 +; RV64IM-NEXT: slli t2, t2, 31 +; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a5, t0, a5 +; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, t0, a6 +; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, t0, a6 +; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a1 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a7, t0, a7 +; RV64IM-NEXT: mul ra, t0, ra +; RV64IM-NEXT: mul a6, t0, t3 +; RV64IM-NEXT: mul t4, t0, t4 +; RV64IM-NEXT: mul t5, t0, t5 +; RV64IM-NEXT: mul a0, t0, t6 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t6, t0, s0 +; RV64IM-NEXT: mul s0, t0, s1 +; RV64IM-NEXT: mul s1, t0, s2 +; RV64IM-NEXT: mul s2, t0, s3 +; RV64IM-NEXT: mul s3, t0, s4 +; RV64IM-NEXT: mul s4, t0, s5 +; RV64IM-NEXT: mul s5, t0, s6 +; RV64IM-NEXT: mul s6, t0, s7 +; RV64IM-NEXT: mul s7, t0, s8 +; RV64IM-NEXT: mul s8, t0, s9 +; RV64IM-NEXT: mul s9, t0, s10 +; RV64IM-NEXT: srli s10, t0, 63 +; RV64IM-NEXT: slli s10, s10, 63 +; RV64IM-NEXT: mul t2, t0, t2 +; RV64IM-NEXT: mul t0, t0, s10 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, a0, a1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s11, t3, s11 +; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, t3, a4 +; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, t3, a3 +; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, t3, a2 +; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, t3, a7 +; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t6, t3, t6 +; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, t3, s5 +; RV64IM-NEXT: xor a0, s10, a0 +; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, s11, t3 +; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a5, a7, ra +; RV64IM-NEXT: xor a7, t6, s0 +; RV64IM-NEXT: xor t6, s5, s6 +; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, t3 +; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s10, t3 +; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: xor a6, a7, s1 +; RV64IM-NEXT: xor a7, t6, s7 +; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s0, t1 +; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t3 +; RV64IM-NEXT: xor a5, a5, t4 +; RV64IM-NEXT: xor a6, a6, s2 +; RV64IM-NEXT: xor a7, a7, s8 +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, t5 +; RV64IM-NEXT: xor a6, a6, s3 +; RV64IM-NEXT: xor a7, a7, s9 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a4 +; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a6, s4 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: xor a6, a7, t0 +; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a1, t0 +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: or a0, a0, a7 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a2, a1, a7 +; RV64IM-NEXT: xor a4, a1, a4 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: slli a2, a2, 24 +; RV64IM-NEXT: xor a5, a4, a5 +; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, a1, a3 +; RV64IM-NEXT: srli a4, a4, 24 +; RV64IM-NEXT: srliw a3, a5, 24 +; RV64IM-NEXT: and a4, a4, a7 +; RV64IM-NEXT: srli a7, a5, 40 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: slli a3, a3, 32 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: and a4, a7, t0 +; RV64IM-NEXT: srli a5, a5, 56 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a4, a4, a5 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i32 @llvm.clmulr.i32(i32 %a, i32 %b) + ret i32 %res +} + +define i64 @clmulr_i64(i64 %a, i64 %b) nounwind { +; RV32IM-LABEL: clmulr_i64: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -512 +; RV32IM-NEXT: sw ra, 508(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 504(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 500(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 496(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 492(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 488(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 484(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 480(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 476(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 472(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 468(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 464(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 460(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t3, a0, 8 +; RV32IM-NEXT: lui s8, 16 +; RV32IM-NEXT: srli t4, a0, 24 +; RV32IM-NEXT: slli s2, a0, 24 +; RV32IM-NEXT: lui t5, 61681 +; RV32IM-NEXT: lui t6, 209715 +; RV32IM-NEXT: lui s0, 349525 +; RV32IM-NEXT: srli s4, a1, 8 +; RV32IM-NEXT: srli s1, a1, 24 +; RV32IM-NEXT: slli s3, a1, 24 +; RV32IM-NEXT: li s10, 1 +; RV32IM-NEXT: lui a3, 1 +; RV32IM-NEXT: lui a4, 2 +; RV32IM-NEXT: lui a5, 4 +; RV32IM-NEXT: lui a6, 8 +; RV32IM-NEXT: lui a7, 32 +; RV32IM-NEXT: lui t0, 64 +; RV32IM-NEXT: lui t1, 128 +; RV32IM-NEXT: lui t2, 256 +; RV32IM-NEXT: lui a2, 512 +; RV32IM-NEXT: addi s7, s8, -256 +; RV32IM-NEXT: sw s7, 396(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi s6, t5, -241 +; RV32IM-NEXT: addi s5, t6, 819 +; RV32IM-NEXT: addi t6, s0, 1365 +; RV32IM-NEXT: slli s10, s10, 11 +; RV32IM-NEXT: and t3, t3, s7 +; RV32IM-NEXT: and a0, a0, s7 +; RV32IM-NEXT: and t5, s4, s7 +; RV32IM-NEXT: and a1, a1, s7 +; RV32IM-NEXT: or t3, t3, t4 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or t4, t5, s1 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, s2, a0 +; RV32IM-NEXT: or a1, s3, a1 +; RV32IM-NEXT: or a0, a0, t3 +; RV32IM-NEXT: or a1, a1, t4 +; RV32IM-NEXT: srli t3, a0, 4 +; RV32IM-NEXT: sw s6, 400(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a0, a0, s6 +; RV32IM-NEXT: srli t4, a1, 4 +; RV32IM-NEXT: and a1, a1, s6 +; RV32IM-NEXT: and t3, t3, s6 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: and t4, t4, s6 +; RV32IM-NEXT: slli a1, a1, 4 +; RV32IM-NEXT: or a0, t3, a0 +; RV32IM-NEXT: or a1, t4, a1 +; RV32IM-NEXT: srli t3, a0, 2 +; RV32IM-NEXT: sw s5, 404(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: srli t4, a1, 2 +; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: and t3, t3, s5 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: and t4, t4, s5 +; RV32IM-NEXT: slli a1, a1, 2 +; RV32IM-NEXT: or a0, t3, a0 +; RV32IM-NEXT: or a1, t4, a1 +; RV32IM-NEXT: srli t3, a0, 1 +; RV32IM-NEXT: sw t6, 408(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: srli t4, a1, 1 +; RV32IM-NEXT: and a1, a1, t6 +; RV32IM-NEXT: and t3, t3, t6 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: and t4, t4, t6 +; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: or s2, t3, a0 +; RV32IM-NEXT: or a0, t4, a1 +; RV32IM-NEXT: and a1, a0, s10 +; RV32IM-NEXT: sw a1, 432(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a3 +; RV32IM-NEXT: sw a1, 436(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a4 +; RV32IM-NEXT: sw a1, 440(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a5 +; RV32IM-NEXT: sw a1, 340(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a6 +; RV32IM-NEXT: sw a1, 412(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, s8 +; RV32IM-NEXT: sw a1, 444(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a7 +; RV32IM-NEXT: sw a1, 452(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and ra, a0, t0 +; RV32IM-NEXT: and a1, a0, t1 +; RV32IM-NEXT: sw a1, 344(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, t2 +; RV32IM-NEXT: sw a1, 448(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a2 +; RV32IM-NEXT: sw a1, 456(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, s10 +; RV32IM-NEXT: sw a1, 384(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a3 +; RV32IM-NEXT: sw a1, 380(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a4 +; RV32IM-NEXT: sw a1, 376(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a5 +; RV32IM-NEXT: sw a1, 368(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a6 +; RV32IM-NEXT: sw a1, 348(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, s8 +; RV32IM-NEXT: sw a1, 336(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a7 +; RV32IM-NEXT: sw a1, 324(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, t0 +; RV32IM-NEXT: sw a1, 320(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, t1 +; RV32IM-NEXT: sw a1, 312(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, t2 +; RV32IM-NEXT: sw a1, 308(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a2 +; RV32IM-NEXT: sw a1, 300(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 1024 +; RV32IM-NEXT: and a2, a0, a1 +; RV32IM-NEXT: sw a2, 424(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 164(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 2048 +; RV32IM-NEXT: and a2, a0, a1 +; RV32IM-NEXT: sw a2, 428(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 4096 +; RV32IM-NEXT: and a2, a0, a1 +; RV32IM-NEXT: sw a2, 416(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 8192 +; RV32IM-NEXT: and s1, a0, a1 +; RV32IM-NEXT: sw s1, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 16384 +; RV32IM-NEXT: and a2, a0, a1 +; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 32768 +; RV32IM-NEXT: and a2, a0, a1 +; RV32IM-NEXT: sw a2, 420(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 65536 +; RV32IM-NEXT: and t3, a0, a1 +; RV32IM-NEXT: sw t3, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 131072 +; RV32IM-NEXT: and a2, a0, a1 +; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: and t2, a0, a1 +; RV32IM-NEXT: sw t2, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a1, 524288 +; RV32IM-NEXT: and t1, a0, a1 +; RV32IM-NEXT: sw t1, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, s2, a1 +; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t0, a0, 4 +; RV32IM-NEXT: sw t0, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t4, a0, 2 +; RV32IM-NEXT: andi a7, a0, 1 +; RV32IM-NEXT: sw a7, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi t5, a0, 8 +; RV32IM-NEXT: andi a6, a0, 16 +; RV32IM-NEXT: sw a6, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: sw a5, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a4, a0, 64 +; RV32IM-NEXT: sw a4, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a3, a0, 128 +; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a2, a0, 256 +; RV32IM-NEXT: andi a1, a0, 512 +; RV32IM-NEXT: andi s11, a0, 1024 +; RV32IM-NEXT: andi s3, s2, 1 +; RV32IM-NEXT: andi s5, s2, 2 +; RV32IM-NEXT: andi s7, s2, 4 +; RV32IM-NEXT: andi t6, s2, 8 +; RV32IM-NEXT: andi s0, s2, 16 +; RV32IM-NEXT: sw s0, 392(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi s0, s2, 32 +; RV32IM-NEXT: andi s4, s2, 64 +; RV32IM-NEXT: andi s6, s2, 128 +; RV32IM-NEXT: andi s8, s2, 256 +; RV32IM-NEXT: andi s9, s2, 512 +; RV32IM-NEXT: andi s10, s2, 1024 +; RV32IM-NEXT: sw s10, 360(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, t0 +; RV32IM-NEXT: sw s10, 292(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, t4 +; RV32IM-NEXT: sw s10, 288(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, a7 +; RV32IM-NEXT: sw s10, 332(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, t5 +; RV32IM-NEXT: sw s10, 284(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, a6 +; RV32IM-NEXT: sw s10, 280(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, a5 +; RV32IM-NEXT: sw s10, 276(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, a4 +; RV32IM-NEXT: sw s10, 272(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, a3 +; RV32IM-NEXT: sw s10, 268(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, a2 +; RV32IM-NEXT: mv t0, a2 +; RV32IM-NEXT: sw s10, 264(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, a1 +; RV32IM-NEXT: mv a7, a1 +; RV32IM-NEXT: sw s10, 260(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, s2, s11 +; RV32IM-NEXT: mv a6, s11 +; RV32IM-NEXT: sw s10, 256(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s10, 432(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s10, s2, s10 +; RV32IM-NEXT: sw s10, 252(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s10, 436(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s10, s2, s10 +; RV32IM-NEXT: sw s10, 248(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s10, 440(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s10, s2, s10 +; RV32IM-NEXT: sw s10, 244(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s10, 340(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, s2, s10 +; RV32IM-NEXT: sw s11, 240(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s11, 412(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, s2, s11 +; RV32IM-NEXT: sw s11, 236(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s11, 444(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, s2, s11 +; RV32IM-NEXT: sw s11, 232(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s11, 452(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, s2, s11 +; RV32IM-NEXT: sw s11, 228(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s11, s2, ra +; RV32IM-NEXT: sw s11, 224(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mv a5, ra +; RV32IM-NEXT: lw s11, 344(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, s2, s11 +; RV32IM-NEXT: sw ra, 220(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 448(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, s2, ra +; RV32IM-NEXT: sw ra, 216(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 456(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, s2, ra +; RV32IM-NEXT: sw ra, 212(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 424(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, s2, ra +; RV32IM-NEXT: sw ra, 208(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 428(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, s2, ra +; RV32IM-NEXT: sw ra, 204(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 416(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, s2, ra +; RV32IM-NEXT: sw ra, 200(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, s2, s1 +; RV32IM-NEXT: sw ra, 196(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s1, s2, ra +; RV32IM-NEXT: sw s1, 192(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s1, 420(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s1, s2, s1 +; RV32IM-NEXT: sw s1, 188(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s1, s2, t3 +; RV32IM-NEXT: sw s1, 184(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s1, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, s2, s1 +; RV32IM-NEXT: sw a4, 180(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, s2, t2 +; RV32IM-NEXT: sw a4, 176(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, s2, t1 +; RV32IM-NEXT: sw a3, 172(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s2, a0, s3 +; RV32IM-NEXT: sw s2, 352(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s2, a0, s5 +; RV32IM-NEXT: sw s2, 364(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s2, a0, s7 +; RV32IM-NEXT: sw s2, 372(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a4, a0, t6 +; RV32IM-NEXT: sw a4, 388(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a1 +; RV32IM-NEXT: sw a4, 392(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a2, a0, s0 +; RV32IM-NEXT: sw a2, 160(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a2, a0, s4 +; RV32IM-NEXT: sw a2, 156(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a2, a0, s6 +; RV32IM-NEXT: sw a2, 304(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a2, a0, s8 +; RV32IM-NEXT: sw a2, 152(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a2, a0, s9 +; RV32IM-NEXT: sw a2, 148(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a1 +; RV32IM-NEXT: sw a2, 296(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 384(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: sw a2, 316(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 380(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a2 +; RV32IM-NEXT: sw a4, 328(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 376(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a2 +; RV32IM-NEXT: sw a4, 356(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 368(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a2 +; RV32IM-NEXT: sw a4, 360(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 348(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a2 +; RV32IM-NEXT: sw a4, 368(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 336(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a2 +; RV32IM-NEXT: sw a4, 376(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 324(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a2 +; RV32IM-NEXT: sw a4, 380(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 320(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a2 +; RV32IM-NEXT: sw a4, 384(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 312(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: sw a2, 144(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 308(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: sw a2, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 300(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: sw a2, 168(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 164(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: sw a2, 308(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a2, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a2 +; RV32IM-NEXT: sw a2, 320(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a1 +; RV32IM-NEXT: sw a2, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a1 +; RV32IM-NEXT: sw a2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a1 +; RV32IM-NEXT: sw a2, 164(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a1 +; RV32IM-NEXT: sw a2, 300(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a0, a1 +; RV32IM-NEXT: sw a2, 312(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a1 +; RV32IM-NEXT: sw a4, 324(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a1 +; RV32IM-NEXT: sw a4, 336(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a0, a1 +; RV32IM-NEXT: sw a4, 348(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu t6, a0, t4 +; RV32IM-NEXT: mul a1, a0, t4 +; RV32IM-NEXT: sw a1, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu t2, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu t3, a0, t5 +; RV32IM-NEXT: mul a1, a0, t5 +; RV32IM-NEXT: sw a1, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu t4, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu s0, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu s2, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu s3, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a2, a0, t0 +; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, t0 +; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a2, a0, a7 +; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a7 +; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a2, a0, a6 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a6 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 432(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a2, a0, a1 +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 432(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a2, a0, a1 +; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 436(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 440(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a2, a0, a1 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a2, a0, s10 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, s10 +; RV32IM-NEXT: sw a1, 340(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a2, a0, a1 +; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 412(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a2, a0, a1 +; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 440(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 452(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a2, a0, a1 +; RV32IM-NEXT: sw a2, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 444(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a2, a0, a5 +; RV32IM-NEXT: sw a2, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a0, a5 +; RV32IM-NEXT: sw a1, 452(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mv a1, s11 +; RV32IM-NEXT: mulhu s11, a0, s11 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 448(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a5, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu s10, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 344(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu s8, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 424(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu s9, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 456(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 416(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a7, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a3, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 0(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a2, a0, ra +; RV32IM-NEXT: mul a1, a0, ra +; RV32IM-NEXT: sw a1, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 420(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu t5, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu t0, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 416(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mulhu a6, a0, s1 +; RV32IM-NEXT: mul a1, a0, s1 +; RV32IM-NEXT: sw a1, 420(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a1, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a4, a0, a1 +; RV32IM-NEXT: mul a1, a0, a1 +; RV32IM-NEXT: sw a1, 428(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw t1, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mulhu a1, a0, t1 +; RV32IM-NEXT: mul a0, a0, t1 +; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 292(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t2, a0 +; RV32IM-NEXT: sw a0, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 288(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s7, t6, a0 +; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s5, t3, a0 +; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s6, t4, a0 +; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s4, s0, a0 +; RV32IM-NEXT: lw a0, 272(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, s2, a0 +; RV32IM-NEXT: sw a0, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s3, s3, a0 +; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s2, t1, a0 +; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t1, a0 +; RV32IM-NEXT: sw a0, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t1, a0 +; RV32IM-NEXT: sw a0, 272(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s0, s0, a0 +; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or t6, t1, a0 +; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t1, a0 +; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t1, a0 +; RV32IM-NEXT: sw a0, 264(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 236(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t1, a0 +; RV32IM-NEXT: sw a0, 284(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or t4, t1, a0 +; RV32IM-NEXT: lw a0, 228(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or t3, t1, a0 +; RV32IM-NEXT: lw a0, 224(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t1, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t1, a0 +; RV32IM-NEXT: sw a0, 248(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 220(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, s11, a0 +; RV32IM-NEXT: sw a0, 260(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 216(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, a5, a0 +; RV32IM-NEXT: sw a0, 276(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, s10, a0 +; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 208(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s8, s8, a0 +; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s10, s9, a0 +; RV32IM-NEXT: lw a0, 200(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s11, a7, a0 +; RV32IM-NEXT: lw a0, 196(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, a3, a0 +; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: sw a0, 268(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: sw a0, 280(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or a0, t0, a0 +; RV32IM-NEXT: sw a0, 292(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 180(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or t2, a6, a0 +; RV32IM-NEXT: lw a0, 176(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or t1, a4, a0 +; RV32IM-NEXT: lw s1, 172(sp) # 4-byte Folded Reload +; RV32IM-NEXT: or s1, a1, s1 +; RV32IM-NEXT: lw a0, 160(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 156(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t0, a0, a1 +; RV32IM-NEXT: lw a0, 152(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 148(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t5, a0, a1 +; RV32IM-NEXT: lw a0, 144(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a6, a0, a1 +; RV32IM-NEXT: lw a0, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s9, a0, a1 +; RV32IM-NEXT: lw a0, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a7, a1, a0 +; RV32IM-NEXT: lw a0, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a1, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw a1, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a2 +; RV32IM-NEXT: lw a2, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a3, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a3 +; RV32IM-NEXT: lw a3, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a4, 340(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a4 +; RV32IM-NEXT: lw a4, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a5, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: lw a5, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, ra +; RV32IM-NEXT: lw ra, 332(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s7, ra, s7 +; RV32IM-NEXT: lw ra, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s5, ra, s5 +; RV32IM-NEXT: xor s4, s6, s4 +; RV32IM-NEXT: xor s2, s3, s2 +; RV32IM-NEXT: xor t6, s0, t6 +; RV32IM-NEXT: xor t3, t4, t3 +; RV32IM-NEXT: xor t4, s8, s10 +; RV32IM-NEXT: xor t1, t2, t1 +; RV32IM-NEXT: lw t2, 304(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t0, t0, t2 +; RV32IM-NEXT: lw t2, 296(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t2, t5, t2 +; RV32IM-NEXT: lw t5, 168(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a6, a6, t5 +; RV32IM-NEXT: lw t5, 164(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t5, s9, t5 +; RV32IM-NEXT: xor a0, a7, a0 +; RV32IM-NEXT: lw a7, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a7 +; RV32IM-NEXT: lw a7, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a7 +; RV32IM-NEXT: lw a7, 412(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a7 +; RV32IM-NEXT: lw a7, 344(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a7 +; RV32IM-NEXT: lw a7, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a7 +; RV32IM-NEXT: xor a7, s7, s5 +; RV32IM-NEXT: lw s0, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s0, s4, s0 +; RV32IM-NEXT: lw s3, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s2, s2, s3 +; RV32IM-NEXT: lw s3, 252(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t6, t6, s3 +; RV32IM-NEXT: lw s3, 248(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t3, t3, s3 +; RV32IM-NEXT: xor t4, t4, s11 +; RV32IM-NEXT: xor t1, t1, s1 +; RV32IM-NEXT: lw s1, 316(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t2, t2, s1 +; RV32IM-NEXT: lw s1, 308(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a6, a6, s1 +; RV32IM-NEXT: lw s1, 300(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t5, t5, s1 +; RV32IM-NEXT: xor a0, a0, a1 +; RV32IM-NEXT: lw a1, 432(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a2, a1 +; RV32IM-NEXT: lw a2, 440(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: lw a3, 424(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a4, a3 +; RV32IM-NEXT: lw a4, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a5, a4 +; RV32IM-NEXT: xor a5, a7, s0 +; RV32IM-NEXT: lw a7, 272(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a7, s2, a7 +; RV32IM-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t6, t6, s0 +; RV32IM-NEXT: lw s0, 260(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t3, t3, s0 +; RV32IM-NEXT: lw s0, 256(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t4, t4, s0 +; RV32IM-NEXT: lw s0, 352(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t1, s0 +; RV32IM-NEXT: lw s0, 328(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t2, t2, s0 +; RV32IM-NEXT: lw s0, 320(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a6, a6, s0 +; RV32IM-NEXT: lw s0, 312(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t5, t5, s0 +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, s0 +; RV32IM-NEXT: lw s0, 436(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, s0 +; RV32IM-NEXT: lw s0, 444(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, s0 +; RV32IM-NEXT: lw s0, 456(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, s0 +; RV32IM-NEXT: lw s0, 416(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, s0 +; RV32IM-NEXT: xor a5, a5, a7 +; RV32IM-NEXT: lw a7, 284(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a7, t6, a7 +; RV32IM-NEXT: lw t6, 276(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t3, t3, t6 +; RV32IM-NEXT: lw t6, 268(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t4, t4, t6 +; RV32IM-NEXT: lw t6, 364(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t1, t6 +; RV32IM-NEXT: lw t6, 356(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t2, t2, t6 +; RV32IM-NEXT: lw t6, 324(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t5, t5, t6 +; RV32IM-NEXT: lw t6, 452(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, t6 +; RV32IM-NEXT: lw t6, 420(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, t6 +; RV32IM-NEXT: xor a5, a5, a7 +; RV32IM-NEXT: lw a7, 288(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a7, t3, a7 +; RV32IM-NEXT: lw t3, 280(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t3, t4, t3 +; RV32IM-NEXT: lw t4, 372(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t1, t4 +; RV32IM-NEXT: lw t4, 360(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t2, t2, t4 +; RV32IM-NEXT: lw t4, 336(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t4, t5, t4 +; RV32IM-NEXT: xor a1, a0, a1 +; RV32IM-NEXT: xor a1, a1, a2 +; RV32IM-NEXT: lw a2, 428(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a4, a5, a7 +; RV32IM-NEXT: lw a5, 292(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, t3, a5 +; RV32IM-NEXT: lw a7, 388(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: lw t1, 368(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t2, t1 +; RV32IM-NEXT: lw t2, 348(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t2, t4, t2 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: lw a3, 448(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a2, a2, a3 +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: lw a3, 392(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a7, a3 +; RV32IM-NEXT: lw a5, 376(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, t1, a5 +; RV32IM-NEXT: xor a3, a4, a3 +; RV32IM-NEXT: lw a4, 380(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a5, a4 +; RV32IM-NEXT: xor a3, a3, t0 +; RV32IM-NEXT: slli a0, a0, 24 +; RV32IM-NEXT: lw a5, 384(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: lw a7, 396(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a5, a1, a7 +; RV32IM-NEXT: slli a5, a5, 8 +; RV32IM-NEXT: or a0, a0, a5 +; RV32IM-NEXT: xor a2, a1, a2 +; RV32IM-NEXT: srli a1, a1, 8 +; RV32IM-NEXT: and a1, a1, a7 +; RV32IM-NEXT: srli a2, a2, 24 +; RV32IM-NEXT: or a1, a1, a2 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: xor a4, a3, a4 +; RV32IM-NEXT: xor a1, a4, a6 +; RV32IM-NEXT: and a2, a1, a7 +; RV32IM-NEXT: xor a4, a1, t2 +; RV32IM-NEXT: srli a1, a1, 8 +; RV32IM-NEXT: and a1, a1, a7 +; RV32IM-NEXT: srli a5, a0, 4 +; RV32IM-NEXT: lw a6, 400(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a6 +; RV32IM-NEXT: and a5, a5, a6 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a5, a0 +; RV32IM-NEXT: slli a3, a3, 24 +; RV32IM-NEXT: slli a2, a2, 8 +; RV32IM-NEXT: or a2, a3, a2 +; RV32IM-NEXT: srli a4, a4, 24 +; RV32IM-NEXT: or a1, a1, a4 +; RV32IM-NEXT: or a1, a2, a1 +; RV32IM-NEXT: srli a2, a0, 2 +; RV32IM-NEXT: lw a3, 404(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a3 +; RV32IM-NEXT: and a2, a2, a3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: srli a2, a1, 4 +; RV32IM-NEXT: and a1, a1, a6 +; RV32IM-NEXT: and a2, a2, a6 +; RV32IM-NEXT: slli a1, a1, 4 +; RV32IM-NEXT: or a1, a2, a1 +; RV32IM-NEXT: srli a2, a1, 2 +; RV32IM-NEXT: and a1, a1, a3 +; RV32IM-NEXT: and a2, a2, a3 +; RV32IM-NEXT: srli a3, a0, 1 +; RV32IM-NEXT: lw a5, 408(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a4, a0, a5 +; RV32IM-NEXT: and a3, a3, a5 +; RV32IM-NEXT: slli a1, a1, 2 +; RV32IM-NEXT: or a1, a2, a1 +; RV32IM-NEXT: srli a0, a1, 1 +; RV32IM-NEXT: and a1, a1, a5 +; RV32IM-NEXT: and a0, a0, a5 +; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: slli a1, a4, 1 +; RV32IM-NEXT: or a1, a3, a1 +; RV32IM-NEXT: lw ra, 508(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 504(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 500(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 496(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 492(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 488(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 484(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 480(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 476(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 472(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 468(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 464(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 460(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 512 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i64: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -448 +; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a2, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li a3, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s3, 16 +; RV64IM-NEXT: srli s0, a0, 56 +; RV64IM-NEXT: srliw t2, a0, 24 +; RV64IM-NEXT: slli t0, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t4, 209715 +; RV64IM-NEXT: lui t6, 349525 +; RV64IM-NEXT: li a7, 1 +; RV64IM-NEXT: lui s5, 2 +; RV64IM-NEXT: lui t1, 4 +; RV64IM-NEXT: lui a4, 128 +; RV64IM-NEXT: lui s7, 256 +; RV64IM-NEXT: lui s8, 4096 +; RV64IM-NEXT: lui s10, 8192 +; RV64IM-NEXT: lui a1, 4080 +; RV64IM-NEXT: and a2, a2, a1 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: addi s1, s3, -256 +; RV64IM-NEXT: and t5, a0, a1 +; RV64IM-NEXT: slli a1, t2, 32 +; RV64IM-NEXT: addi s9, t3, -241 +; RV64IM-NEXT: addi t4, t4, 819 +; RV64IM-NEXT: addi t2, t6, 1365 +; RV64IM-NEXT: slli t3, a7, 11 +; RV64IM-NEXT: slli s11, a7, 32 +; RV64IM-NEXT: slli ra, a7, 33 +; RV64IM-NEXT: slli t6, a7, 34 +; RV64IM-NEXT: slli s2, a7, 35 +; RV64IM-NEXT: slli s4, a7, 36 +; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a6, a3 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: slli a3, a7, 37 +; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a3, a5, s1 +; RV64IM-NEXT: or a3, a3, s0 +; RV64IM-NEXT: slli a5, a7, 38 +; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t5, t5, 24 +; RV64IM-NEXT: and a0, a0, s1 +; RV64IM-NEXT: or a1, t5, a1 +; RV64IM-NEXT: slli a5, s9, 32 +; RV64IM-NEXT: add a5, s9, a5 +; RV64IM-NEXT: slli s0, t4, 32 +; RV64IM-NEXT: add t4, t4, s0 +; RV64IM-NEXT: slli s4, t2, 32 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: add t2, t2, s4 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a0, t0, a0 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t4 +; RV64IM-NEXT: and a1, a1, t4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, t2 +; RV64IM-NEXT: and a1, a1, t2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or t0, a1, a0 +; RV64IM-NEXT: andi a0, t0, 2 +; RV64IM-NEXT: andi a1, t0, 1 +; RV64IM-NEXT: andi a2, t0, 4 +; RV64IM-NEXT: andi a3, t0, 8 +; RV64IM-NEXT: andi a5, t0, 16 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 32 +; RV64IM-NEXT: mul a1, t0, a2 +; RV64IM-NEXT: mul a2, t0, a3 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, t0, 256 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a2, a0 +; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a0, t0, 512 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t4, a7, 39 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 40 +; RV64IM-NEXT: and a1, t0, a4 +; RV64IM-NEXT: and a2, t0, s7 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 41 +; RV64IM-NEXT: and a2, t0, s8 +; RV64IM-NEXT: and a3, t0, s10 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 48 +; RV64IM-NEXT: and a3, t0, s11 +; RV64IM-NEXT: and a4, t0, ra +; RV64IM-NEXT: mul a3, t0, a3 +; RV64IM-NEXT: mul a4, t0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, a7, 49 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 56 +; RV64IM-NEXT: and a1, t0, a2 +; RV64IM-NEXT: and a2, t0, a3 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, a7, 57 +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, a1 +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: mul a1, t0, a1 +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, a7, 42 +; RV64IM-NEXT: slli ra, a7, 43 +; RV64IM-NEXT: slli a3, a7, 44 +; RV64IM-NEXT: slli a4, a7, 45 +; RV64IM-NEXT: slli t5, a7, 46 +; RV64IM-NEXT: slli s0, a7, 47 +; RV64IM-NEXT: slli s1, a7, 50 +; RV64IM-NEXT: slli a0, a7, 51 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 52 +; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 53 +; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 54 +; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 55 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 58 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 59 +; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 60 +; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a7, 61 +; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a7, a7, 62 +; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, t3 +; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 1 +; RV64IM-NEXT: and a0, t0, s7 +; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 8 +; RV64IM-NEXT: and a0, t0, s8 +; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s3 +; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s6, 32 +; RV64IM-NEXT: and a0, t0, s6 +; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 64 +; RV64IM-NEXT: and a0, t0, s10 +; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 512 +; RV64IM-NEXT: and a0, t0, s11 +; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s4, 1024 +; RV64IM-NEXT: and a0, t0, s4 +; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s5, 2048 +; RV64IM-NEXT: and a0, t0, s5 +; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 16384 +; RV64IM-NEXT: and a0, t0, s9 +; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a5, 32768 +; RV64IM-NEXT: and a5, t0, a5 +; RV64IM-NEXT: lui a6, 65536 +; RV64IM-NEXT: and a6, t0, a6 +; RV64IM-NEXT: lui t1, 131072 +; RV64IM-NEXT: and t1, t0, t1 +; RV64IM-NEXT: lui t2, 262144 +; RV64IM-NEXT: and t2, t0, t2 +; RV64IM-NEXT: and a0, t0, t6 +; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, t0, s2 +; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, t0, a0 +; RV64IM-NEXT: and a1, t0, t4 +; RV64IM-NEXT: and a7, t0, a2 +; RV64IM-NEXT: and ra, t0, ra +; RV64IM-NEXT: and t3, t0, a3 +; RV64IM-NEXT: and t4, t0, a4 +; RV64IM-NEXT: and t5, t0, t5 +; RV64IM-NEXT: and t6, t0, s0 +; RV64IM-NEXT: and s0, t0, s1 +; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, t0, a2 +; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, t0, a2 +; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, t0, a2 +; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, t0, a2 +; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, t0, a2 +; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, t0, a2 +; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, t0, a2 +; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, t0, a2 +; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, t0, a2 +; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, t0, a2 +; RV64IM-NEXT: andi s11, t0, 64 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 128 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi s11, t0, 1024 +; RV64IM-NEXT: mul a2, t0, s11 +; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, t0, a2 +; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a4, t0, a2 +; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, t0, a2 +; RV64IM-NEXT: mul a2, t0, a5 +; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, a6 +; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t1 +; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a2, t0, t2 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srliw t2, t0, 31 +; RV64IM-NEXT: slli t2, t2, 31 +; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, t0, a2 +; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a5, t0, a5 +; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, t0, a6 +; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, t0, a6 +; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a0 +; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a0, t0, a1 +; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a7, t0, a7 +; RV64IM-NEXT: mul ra, t0, ra +; RV64IM-NEXT: mul a6, t0, t3 +; RV64IM-NEXT: mul t4, t0, t4 +; RV64IM-NEXT: mul t5, t0, t5 +; RV64IM-NEXT: mul a0, t0, t6 +; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t6, t0, s0 +; RV64IM-NEXT: mul s0, t0, s1 +; RV64IM-NEXT: mul s1, t0, s2 +; RV64IM-NEXT: mul s2, t0, s3 +; RV64IM-NEXT: mul s3, t0, s4 +; RV64IM-NEXT: mul s4, t0, s5 +; RV64IM-NEXT: mul s5, t0, s6 +; RV64IM-NEXT: mul s6, t0, s7 +; RV64IM-NEXT: mul s7, t0, s8 +; RV64IM-NEXT: mul s8, t0, s9 +; RV64IM-NEXT: mul s9, t0, s10 +; RV64IM-NEXT: srli s10, t0, 63 +; RV64IM-NEXT: slli s10, s10, 63 +; RV64IM-NEXT: mul t2, t0, t2 +; RV64IM-NEXT: mul t0, t0, s10 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, a0, a1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, a1 +; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s11, t3, s11 +; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, t3, a4 +; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, t3, a3 +; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, t3, a2 +; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, t3, a7 +; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t6, t3, t6 +; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, t3, s5 +; RV64IM-NEXT: xor a0, s10, a0 +; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s10, s11, t3 +; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a5, a7, ra +; RV64IM-NEXT: xor a7, t6, s0 +; RV64IM-NEXT: xor t6, s5, s6 +; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a0, t3 +; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, t3 +; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s10, t3 +; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a4, t3 +; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: xor a6, a7, s1 +; RV64IM-NEXT: xor a7, t6, s7 +; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s0, t1 +; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t3 +; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t3 +; RV64IM-NEXT: xor a5, a5, t4 +; RV64IM-NEXT: xor a6, a6, s2 +; RV64IM-NEXT: xor a7, a7, s8 +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: xor a1, a1, t1 +; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a5, a5, t5 +; RV64IM-NEXT: xor a6, a6, s3 +; RV64IM-NEXT: xor a7, a7, s9 +; RV64IM-NEXT: xor a1, a1, a4 +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a4 +; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: xor a5, a6, s4 +; RV64IM-NEXT: slli a0, a0, 56 +; RV64IM-NEXT: xor a6, a7, t0 +; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a1, t0 +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: or a0, a0, a7 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a2, a1, a7 +; RV64IM-NEXT: xor a4, a1, a4 +; RV64IM-NEXT: srli a1, a1, 8 +; RV64IM-NEXT: slli a2, a2, 24 +; RV64IM-NEXT: xor a5, a4, a5 +; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, a1, a3 +; RV64IM-NEXT: srli a4, a4, 24 +; RV64IM-NEXT: srliw a3, a5, 24 +; RV64IM-NEXT: and a4, a4, a7 +; RV64IM-NEXT: srli a7, a5, 40 +; RV64IM-NEXT: xor a5, a5, a6 +; RV64IM-NEXT: slli a3, a3, 32 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: and a4, a7, t0 +; RV64IM-NEXT: srli a5, a5, 56 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: or a4, a4, a5 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 448 +; RV64IM-NEXT: ret + %res = call i64 @llvm.clmulr.i64(i64 %a, i64 %b) + ret i64 %res +} + +define i4 @clmulr_constfold_i4() nounwind { +; CHECK-LABEL: clmulr_constfold_i4: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret + %res = call i4 @llvm.clmulr.i4(i4 1, i4 2) + ret i4 %res +} + +define i16 @clmulr_constfold_i16() nounwind { +; RV32IM-LABEL: clmulr_constfold_i16: +; RV32IM: # %bb.0: +; RV32IM-NEXT: lui a0, 699051 +; RV32IM-NEXT: addi a0, a0, -1366 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_constfold_i16: +; RV64IM: # %bb.0: +; RV64IM-NEXT: lui a0, %hi(.LCPI13_0) +; RV64IM-NEXT: ld a0, %lo(.LCPI13_0)(a0) +; RV64IM-NEXT: ret + %res = call i16 @llvm.clmulr.i16(i16 -2, i16 -1) + ret i16 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll new file mode 100644 index 0000000000000..dd04be1212587 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll @@ -0,0 +1,24188 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define @clmul_nxv1i32( %x, %y) nounwind { +; CHECK-LABEL: clmul_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 2 +; CHECK-NEXT: vand.vi v11, v9, 1 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v11, v10 +; CHECK-NEXT: vand.vi v11, v9, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vi v11, v9, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vxor.vv v8, v10, v8 +; CHECK-NEXT: ret + %a = call @llvm.clmul.nxv1i32( %x, %y) + ret %a +} + +define @clmul_nxv2i32( %x, %y) nounwind { +; CHECK-LABEL: clmul_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 2 +; CHECK-NEXT: vand.vi v11, v9, 1 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v11, v10 +; CHECK-NEXT: vand.vi v11, v9, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vi v11, v9, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vxor.vv v8, v10, v8 +; CHECK-NEXT: ret + %a = call @llvm.clmul.nxv2i32( %x, %y) + ret %a +} + +define @clmul_nxv4i32( %x, %y) nounwind { +; CHECK-LABEL: clmul_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vand.vi v12, v10, 2 +; CHECK-NEXT: vand.vi v14, v10, 1 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v14, v12 +; CHECK-NEXT: vand.vi v14, v10, 4 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vi v14, v10, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vand.vx v14, v10, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vxor.vv v12, v12, v14 +; CHECK-NEXT: vmul.vv v8, v8, v10 +; CHECK-NEXT: vxor.vv v8, v12, v8 +; CHECK-NEXT: ret + %a = call @llvm.clmul.nxv4i32( %x, %y) + ret %a +} + +define @clmul_nxv8i32( %x, %y) nounwind { +; CHECK-LABEL: clmul_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vand.vi v12, v8, 2 +; CHECK-NEXT: vand.vi v16, v8, 1 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v16, v12 +; CHECK-NEXT: vand.vi v16, v8, 4 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vi v16, v8, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vxor.vv v12, v12, v16 +; CHECK-NEXT: vand.vx v16, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vxor.vv v8, v12, v8 +; CHECK-NEXT: ret + %a = call @llvm.clmul.nxv8i32( %x, %x) + ret %a +} + +define @clmul_nxv16i32( %x, %y) nounwind { +; CHECK-LABEL: clmul_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vand.vi v24, v16, 2 +; CHECK-NEXT: vand.vi v0, v16, 1 +; CHECK-NEXT: vmul.vv v24, v8, v24 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v0, v24 +; CHECK-NEXT: vand.vi v0, v16, 4 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vi v0, v16, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vand.vx v0, v16, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v16, v16, a0 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vxor.vv v24, v24, v0 +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vxor.vv v8, v24, v8 +; CHECK-NEXT: ret + %a = call @llvm.clmul.nxv16i32( %x, %y) + ret %a +} + +define @clmul_nxv1i64( %x, %y) nounwind { +; RV32-LABEL: clmul_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li t5, 1 +; RV32-NEXT: li a4, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li s11, 8 +; RV32-NEXT: li a0, 16 +; RV32-NEXT: li ra, 32 +; RV32-NEXT: li s10, 64 +; RV32-NEXT: li s9, 128 +; RV32-NEXT: li s8, 256 +; RV32-NEXT: li s7, 512 +; RV32-NEXT: li s1, 1024 +; RV32-NEXT: lui s6, 1 +; RV32-NEXT: lui s5, 2 +; RV32-NEXT: lui s4, 4 +; RV32-NEXT: lui s3, 8 +; RV32-NEXT: lui s2, 16 +; RV32-NEXT: lui s0, 32 +; RV32-NEXT: lui t6, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a3, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw t5, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a4, 260(sp) +; RV32-NEXT: lui a4, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s11, 244(sp) +; RV32-NEXT: vsetvli s11, zero, e64, m1, ta, ma +; RV32-NEXT: vand.vi v13, v9, 2 +; RV32-NEXT: vand.vi v14, v9, 1 +; RV32-NEXT: vand.vi v12, v9, 4 +; RV32-NEXT: vand.vi v11, v9, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw a0, 236(sp) +; RV32-NEXT: vand.vx v10, v9, a0 +; RV32-NEXT: addi s11, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw ra, 228(sp) +; RV32-NEXT: vand.vx v15, v9, ra +; RV32-NEXT: addi ra, sp, 264 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s10, 220(sp) +; RV32-NEXT: vand.vx v16, v9, s10 +; RV32-NEXT: addi s10, sp, 256 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s9, 212(sp) +; RV32-NEXT: vand.vx v17, v9, s9 +; RV32-NEXT: addi s9, sp, 248 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s8, 204(sp) +; RV32-NEXT: vand.vx v18, v9, s8 +; RV32-NEXT: addi s8, sp, 240 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s7, 196(sp) +; RV32-NEXT: vand.vx v19, v9, s7 +; RV32-NEXT: addi s7, sp, 232 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw s1, 188(sp) +; RV32-NEXT: vand.vx v20, v9, s1 +; RV32-NEXT: slli t5, t5, 11 +; RV32-NEXT: vand.vx v21, v9, s6 +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw t5, 180(sp) +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw s6, 172(sp) +; RV32-NEXT: addi s6, sp, 216 +; RV32-NEXT: vand.vx v22, v9, s5 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s5, 164(sp) +; RV32-NEXT: addi s5, sp, 208 +; RV32-NEXT: vand.vx v23, v9, s4 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s4, 156(sp) +; RV32-NEXT: addi s4, sp, 200 +; RV32-NEXT: vand.vx v24, v9, s3 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s3, 148(sp) +; RV32-NEXT: addi s3, sp, 192 +; RV32-NEXT: vand.vx v25, v9, s2 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s2, 140(sp) +; RV32-NEXT: addi s2, sp, 184 +; RV32-NEXT: vand.vx v26, v9, s0 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s0, 132(sp) +; RV32-NEXT: addi s1, sp, 176 +; RV32-NEXT: vand.vx v27, v9, t6 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t6, 124(sp) +; RV32-NEXT: addi s0, sp, 168 +; RV32-NEXT: vand.vx v28, v9, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi t6, sp, 160 +; RV32-NEXT: vand.vx v29, v9, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t4, sp, 152 +; RV32-NEXT: vand.vx v30, v9, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi t3, sp, 144 +; RV32-NEXT: vand.vx v31, v9, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t2, sp, 136 +; RV32-NEXT: vand.vx v7, v9, t0 +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t1, sp, 128 +; RV32-NEXT: vand.vx v6, v9, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi t0, sp, 120 +; RV32-NEXT: vand.vx v5, v9, a6 +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi a7, sp, 112 +; RV32-NEXT: vand.vx v4, v9, a5 +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi a6, sp, 104 +; RV32-NEXT: vand.vx v3, v9, a3 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a3, 52(sp) +; RV32-NEXT: addi a5, sp, 96 +; RV32-NEXT: vand.vx v2, v9, a4 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a4, 44(sp) +; RV32-NEXT: addi a4, sp, 88 +; RV32-NEXT: vand.vx v1, v9, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a0, 262144 +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a2, sp, 72 +; RV32-NEXT: vand.vx v0, v9, t5 +; RV32-NEXT: addi a1, sp, 64 +; RV32-NEXT: vmul.vv v13, v8, v13 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vxor.vi v14, v14, 0 +; RV32-NEXT: vxor.vv v14, v14, v13 +; RV32-NEXT: vlse64.v v13, (s11), zero +; RV32-NEXT: addi s11, sp, 56 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v14, v14, v12 +; RV32-NEXT: vlse64.v v12, (ra), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv ra, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, ra +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v12, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi ra, sp, 48 +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v14, v14, v11 +; RV32-NEXT: vlse64.v v11, (s10), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli s10, t5, 2 +; RV32-NEXT: add t5, s10, t5 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s10, sp, 40 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v14, v14, v10 +; RV32-NEXT: vlse64.v v10, (s9), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v10, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi t5, sp, 32 +; RV32-NEXT: vmul.vv v15, v8, v15 +; RV32-NEXT: vxor.vv v15, v14, v15 +; RV32-NEXT: vlse64.v v10, (s8), zero +; RV32-NEXT: csrr s8, vlenb +; RV32-NEXT: slli s9, s8, 1 +; RV32-NEXT: add s8, s9, s8 +; RV32-NEXT: add s8, sp, s8 +; RV32-NEXT: addi s8, s8, 288 +; RV32-NEXT: vs1r.v v10, (s8) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s8, sp, 24 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v16, v15, v16 +; RV32-NEXT: vlse64.v v10, (s7), zero +; RV32-NEXT: csrr s7, vlenb +; RV32-NEXT: slli s7, s7, 1 +; RV32-NEXT: add s7, sp, s7 +; RV32-NEXT: addi s7, s7, 288 +; RV32-NEXT: vs1r.v v10, (s7) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s7, sp, 16 +; RV32-NEXT: vmul.vv v17, v8, v17 +; RV32-NEXT: vmul.vv v18, v8, v18 +; RV32-NEXT: vmul.vv v19, v8, v19 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vmul.vv v21, v8, v21 +; RV32-NEXT: vmul.vv v22, v8, v22 +; RV32-NEXT: vmul.vv v23, v8, v23 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vmul.vv v25, v8, v25 +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vmul.vv v27, v8, v27 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vmul.vv v29, v8, v29 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vmul.vv v31, v8, v31 +; RV32-NEXT: vmul.vv v7, v8, v7 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: vmul.vv v5, v8, v5 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vmul.vv v3, v8, v3 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vmul.vv v1, v8, v1 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v16, v16, v17 +; RV32-NEXT: addi s9, sp, 224 +; RV32-NEXT: vlse64.v v11, (s9), zero +; RV32-NEXT: vxor.vv v16, v16, v18 +; RV32-NEXT: vlse64.v v10, (s6), zero +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 288 +; RV32-NEXT: vs1r.v v10, (s6) # vscale x 8-byte Folded Spill +; RV32-NEXT: vxor.vv v16, v16, v19 +; RV32-NEXT: vlse64.v v10, (s5), zero +; RV32-NEXT: addi s5, sp, 288 +; RV32-NEXT: vs1r.v v10, (s5) # vscale x 8-byte Folded Spill +; RV32-NEXT: vxor.vv v16, v16, v20 +; RV32-NEXT: vlse64.v v12, (s4), zero +; RV32-NEXT: vxor.vv v16, v16, v0 +; RV32-NEXT: vlse64.v v0, (s3), zero +; RV32-NEXT: vxor.vv v16, v16, v21 +; RV32-NEXT: vlse64.v v21, (s2), zero +; RV32-NEXT: vxor.vv v16, v16, v22 +; RV32-NEXT: vlse64.v v22, (s1), zero +; RV32-NEXT: vxor.vv v16, v16, v23 +; RV32-NEXT: vlse64.v v23, (s0), zero +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: vlse64.v v24, (t6), zero +; RV32-NEXT: vxor.vv v16, v16, v25 +; RV32-NEXT: vlse64.v v25, (t4), zero +; RV32-NEXT: vxor.vv v16, v16, v26 +; RV32-NEXT: vlse64.v v26, (t3), zero +; RV32-NEXT: vxor.vv v16, v16, v27 +; RV32-NEXT: vlse64.v v27, (t2), zero +; RV32-NEXT: vxor.vv v16, v16, v28 +; RV32-NEXT: vlse64.v v28, (t1), zero +; RV32-NEXT: vxor.vv v16, v16, v29 +; RV32-NEXT: vlse64.v v29, (t0), zero +; RV32-NEXT: vxor.vv v16, v16, v30 +; RV32-NEXT: vlse64.v v30, (a7), zero +; RV32-NEXT: vxor.vv v16, v16, v31 +; RV32-NEXT: vlse64.v v31, (a6), zero +; RV32-NEXT: vxor.vv v16, v16, v7 +; RV32-NEXT: vlse64.v v7, (a5), zero +; RV32-NEXT: vxor.vv v16, v16, v6 +; RV32-NEXT: vlse64.v v6, (a4), zero +; RV32-NEXT: vxor.vv v16, v16, v5 +; RV32-NEXT: vlse64.v v5, (a3), zero +; RV32-NEXT: vxor.vv v16, v16, v4 +; RV32-NEXT: vlse64.v v4, (a2), zero +; RV32-NEXT: vxor.vv v16, v16, v3 +; RV32-NEXT: vlse64.v v3, (a1), zero +; RV32-NEXT: vxor.vv v16, v16, v2 +; RV32-NEXT: vlse64.v v2, (s11), zero +; RV32-NEXT: vxor.vv v1, v16, v1 +; RV32-NEXT: vlse64.v v10, (ra), zero +; RV32-NEXT: vand.vv v13, v9, v13 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v14, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v14, v9, v14 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v15, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v15, v9, v15 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v16, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v16, v9, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v17, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v17, v9, v17 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v18, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v18, v9, v18 +; RV32-NEXT: vand.vv v19, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v20, v9, v11 +; RV32-NEXT: addi a1, sp, 288 +; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v11, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v11, v9, v12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v0, v9, v0 +; RV32-NEXT: vand.vv v21, v9, v21 +; RV32-NEXT: vand.vv v22, v9, v22 +; RV32-NEXT: vand.vv v23, v9, v23 +; RV32-NEXT: vand.vv v24, v9, v24 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vand.vv v26, v9, v26 +; RV32-NEXT: vand.vv v27, v9, v27 +; RV32-NEXT: vand.vv v28, v9, v28 +; RV32-NEXT: vand.vv v29, v9, v29 +; RV32-NEXT: vand.vv v30, v9, v30 +; RV32-NEXT: vand.vv v31, v9, v31 +; RV32-NEXT: vand.vv v7, v9, v7 +; RV32-NEXT: vand.vv v6, v9, v6 +; RV32-NEXT: vand.vv v5, v9, v5 +; RV32-NEXT: vand.vv v4, v9, v4 +; RV32-NEXT: vand.vv v11, v9, v3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v2, v9, v2 +; RV32-NEXT: vand.vv v10, v9, v10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vlse64.v v10, (s10), zero +; RV32-NEXT: vlse64.v v3, (t5), zero +; RV32-NEXT: vlse64.v v11, (s8), zero +; RV32-NEXT: vlse64.v v12, (s7), zero +; RV32-NEXT: vand.vv v10, v9, v10 +; RV32-NEXT: vand.vv v3, v9, v3 +; RV32-NEXT: vand.vv v11, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v12, v9, v12 +; RV32-NEXT: vand.vx v9, v9, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: vxor.vv v9, v1, v9 +; RV32-NEXT: vmul.vv v11, v8, v13 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v14 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v15 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v16 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v17 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v18 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v19 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v20 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v0 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v21 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v22 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v23 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v24 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v25 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v26 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v27 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v28 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v29 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v30 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v31 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v7 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v6 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v5 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v4 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v2 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: vmul.vv v10, v8, v3 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v9, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vand.vi v10, v9, 2 +; RV64-NEXT: vand.vi v11, v9, 1 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v11, v10 +; RV64-NEXT: vand.vi v11, v9, 4 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vi v11, v9, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vmul.vv v8, v8, v9 +; RV64-NEXT: vxor.vv v8, v10, v8 +; RV64-NEXT: ret + %a = call @llvm.clmul.nxv1i64( %x, %y) + ret %a +} + +define @clmul_nxv2i64( %x, %y) nounwind { +; RV32-LABEL: clmul_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li s2, 1 +; RV32-NEXT: li a3, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li s7, 8 +; RV32-NEXT: li a0, 16 +; RV32-NEXT: li s6, 32 +; RV32-NEXT: li s5, 64 +; RV32-NEXT: li s4, 128 +; RV32-NEXT: li s1, 256 +; RV32-NEXT: li s0, 512 +; RV32-NEXT: li t5, 1024 +; RV32-NEXT: lui ra, 1 +; RV32-NEXT: lui s8, 2 +; RV32-NEXT: lui s10, 4 +; RV32-NEXT: lui s11, 8 +; RV32-NEXT: lui s9, 16 +; RV32-NEXT: lui s3, 32 +; RV32-NEXT: lui t6, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a4, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw s2, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a3, 260(sp) +; RV32-NEXT: lui a3, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s7, 244(sp) +; RV32-NEXT: vsetvli s7, zero, e64, m2, ta, ma +; RV32-NEXT: vand.vi v28, v10, 2 +; RV32-NEXT: vand.vi v20, v10, 1 +; RV32-NEXT: vand.vi v30, v10, 4 +; RV32-NEXT: vand.vi v14, v10, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw a0, 236(sp) +; RV32-NEXT: vand.vx v12, v10, a0 +; RV32-NEXT: addi s7, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw s6, 228(sp) +; RV32-NEXT: vand.vx v16, v10, s6 +; RV32-NEXT: addi s6, sp, 264 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s5, 220(sp) +; RV32-NEXT: vand.vx v18, v10, s5 +; RV32-NEXT: addi s5, sp, 256 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s4, 212(sp) +; RV32-NEXT: vand.vx v0, v10, s4 +; RV32-NEXT: addi s4, sp, 248 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s1, 204(sp) +; RV32-NEXT: vand.vx v6, v10, s1 +; RV32-NEXT: addi s1, sp, 240 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s0, 196(sp) +; RV32-NEXT: vand.vx v4, v10, s0 +; RV32-NEXT: addi s0, sp, 232 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw t5, 188(sp) +; RV32-NEXT: vand.vx v2, v10, t5 +; RV32-NEXT: slli s2, s2, 11 +; RV32-NEXT: vand.vx v24, v10, ra +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw s2, 180(sp) +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw ra, 172(sp) +; RV32-NEXT: addi t5, sp, 216 +; RV32-NEXT: vand.vx v26, v10, s8 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s8, 164(sp) +; RV32-NEXT: addi s8, sp, 208 +; RV32-NEXT: vand.vx v22, v10, s10 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s10, 156(sp) +; RV32-NEXT: addi s10, sp, 200 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vi v20, v20, 0 +; RV32-NEXT: vxor.vv v20, v20, v28 +; RV32-NEXT: vand.vx v28, v10, s11 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s11, 148(sp) +; RV32-NEXT: addi s11, sp, 192 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vxor.vv v20, v20, v30 +; RV32-NEXT: vand.vx v30, v10, s9 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s9, 140(sp) +; RV32-NEXT: addi s9, sp, 184 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vxor.vv v14, v20, v14 +; RV32-NEXT: vand.vx v20, v10, s3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv ra, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, ra +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v20, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s3, 132(sp) +; RV32-NEXT: addi s3, sp, 176 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v12, v14, v12 +; RV32-NEXT: vand.vx v14, v10, t6 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t6, 124(sp) +; RV32-NEXT: addi t6, sp, 168 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v10, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi t4, sp, 160 +; RV32-NEXT: vmul.vv v18, v8, v18 +; RV32-NEXT: vxor.vv v18, v12, v18 +; RV32-NEXT: vand.vx v12, v10, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t3, sp, 152 +; RV32-NEXT: vmul.vv v20, v8, v0 +; RV32-NEXT: vxor.vv v18, v18, v20 +; RV32-NEXT: vand.vx v20, v10, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi t2, sp, 144 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: vxor.vv v18, v18, v6 +; RV32-NEXT: vand.vx v6, v10, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t1, sp, 136 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v18, v18, v4 +; RV32-NEXT: vand.vx v4, v10, t0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv ra, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add ra, ra, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, ra +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t0, sp, 128 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vxor.vv v18, v18, v2 +; RV32-NEXT: vand.vx v2, v10, s2 +; RV32-NEXT: addi ra, sp, 120 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vxor.vv v18, v18, v2 +; RV32-NEXT: vand.vx v2, v10, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi a7, sp, 112 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v18, v18, v24 +; RV32-NEXT: vand.vx v4, v10, a6 +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi a6, sp, 104 +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vxor.vv v18, v18, v26 +; RV32-NEXT: vand.vx v26, v10, a5 +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi a5, sp, 96 +; RV32-NEXT: vmul.vv v22, v8, v22 +; RV32-NEXT: vxor.vv v18, v18, v22 +; RV32-NEXT: vand.vx v24, v10, a4 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a4, 52(sp) +; RV32-NEXT: addi a4, sp, 88 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v18, v18, v28 +; RV32-NEXT: vand.vx v28, v10, a3 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a3, 44(sp) +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vxor.vv v18, v18, v30 +; RV32-NEXT: vand.vx v30, v10, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: addi a2, sp, 72 +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a0, 262144 +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a1, sp, 64 +; RV32-NEXT: sw a6, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv s2, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, s2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vl2r.v v22, (a6) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v22 +; RV32-NEXT: vxor.vv v0, v18, v0 +; RV32-NEXT: vlse64.v v18, (s7), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv s2, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, s2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s7, sp, 56 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vxor.vv v14, v0, v14 +; RV32-NEXT: vlse64.v v18, (s6), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv s2, a6 +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: add a6, a6, s2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s2, sp, 48 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v14, v14, v16 +; RV32-NEXT: vlse64.v v16, (s5), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: mv s5, a6 +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: add a6, a6, s5 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v16, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s5, sp, 40 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v12, v14, v12 +; RV32-NEXT: vlse64.v v14, (s4), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 5 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v14, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s4, sp, 32 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v12, v20 +; RV32-NEXT: vlse64.v v12, (s1), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: mv s1, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add s1, s1, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add s1, s1, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, s1 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s1, sp, 24 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: vxor.vv v20, v20, v6 +; RV32-NEXT: vlse64.v v12, (s0), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv s0, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add s0, s0, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, s0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s0, sp, 16 +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: slli s6, s6, 1 +; RV32-NEXT: mv a6, s6 +; RV32-NEXT: slli s6, s6, 1 +; RV32-NEXT: add a6, a6, s6 +; RV32-NEXT: slli s6, s6, 3 +; RV32-NEXT: add s6, s6, a6 +; RV32-NEXT: lw a6, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 288 +; RV32-NEXT: vl2r.v v12, (s6) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v6, v8, v12 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vxor.vv v20, v20, v6 +; RV32-NEXT: addi s6, sp, 224 +; RV32-NEXT: vlse64.v v0, (s6), zero +; RV32-NEXT: vxor.vv v20, v20, v2 +; RV32-NEXT: vlse64.v v6, (t5), zero +; RV32-NEXT: vxor.vv v20, v20, v4 +; RV32-NEXT: vlse64.v v22, (s8), zero +; RV32-NEXT: vxor.vv v20, v20, v26 +; RV32-NEXT: vlse64.v v18, (s10), zero +; RV32-NEXT: vxor.vv v20, v20, v24 +; RV32-NEXT: vlse64.v v16, (s11), zero +; RV32-NEXT: vxor.vv v20, v20, v28 +; RV32-NEXT: vlse64.v v14, (s9), zero +; RV32-NEXT: vxor.vv v2, v20, v30 +; RV32-NEXT: vlse64.v v12, (s3), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v26, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v4, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 4 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v30, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 5 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v20, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v28, v10, v24 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v24, v10, v24 +; RV32-NEXT: vand.vv v0, v10, v0 +; RV32-NEXT: vand.vv v6, v10, v6 +; RV32-NEXT: vand.vv v22, v10, v22 +; RV32-NEXT: vand.vv v18, v10, v18 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v16, v10, v16 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v16, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v14, v10, v14 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v14, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v12, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (t6), zero +; RV32-NEXT: vlse64.v v14, (t4), zero +; RV32-NEXT: vlse64.v v16, (t3), zero +; RV32-NEXT: vlse64.v v18, (t2), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t3, t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t3, t3, t2 +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (t1), zero +; RV32-NEXT: vlse64.v v14, (t0), zero +; RV32-NEXT: vlse64.v v16, (ra), zero +; RV32-NEXT: vlse64.v v18, (a7), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 4 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add t0, t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (a6), zero +; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: vlse64.v v18, (a3), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a4, a4, a3 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (a2), zero +; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vlse64.v v16, (s7), zero +; RV32-NEXT: vlse64.v v18, (s2), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: addi a1, sp, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v14, (s5), zero +; RV32-NEXT: vlse64.v v16, (s4), zero +; RV32-NEXT: vlse64.v v18, (s1), zero +; RV32-NEXT: vlse64.v v12, (s0), zero +; RV32-NEXT: vand.vv v14, v10, v14 +; RV32-NEXT: vand.vv v16, v10, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v18, v10, v18 +; RV32-NEXT: vand.vv v16, v10, v12 +; RV32-NEXT: vand.vx v10, v10, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v10, v2, v10 +; RV32-NEXT: vmul.vv v12, v8, v26 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v4 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v30 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v20 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v28 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v24 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v0 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v6 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v22 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v14 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v18 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v10, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vand.vi v12, v10, 2 +; RV64-NEXT: vand.vi v14, v10, 1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v14, v12 +; RV64-NEXT: vand.vi v14, v10, 4 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vi v14, v10, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vmul.vv v8, v8, v10 +; RV64-NEXT: vxor.vv v8, v12, v8 +; RV64-NEXT: ret + %a = call @llvm.clmul.nxv2i64( %x, %y) + ret %a +} + +define @clmul_nxv4i64( %x, %y) nounwind { +; RV32-LABEL: clmul_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li s4, 1 +; RV32-NEXT: li a3, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li a0, 8 +; RV32-NEXT: li s3, 16 +; RV32-NEXT: li s2, 32 +; RV32-NEXT: li s5, 64 +; RV32-NEXT: li s6, 128 +; RV32-NEXT: li s8, 256 +; RV32-NEXT: li s1, 512 +; RV32-NEXT: li s7, 1024 +; RV32-NEXT: lui ra, 1 +; RV32-NEXT: lui s11, 2 +; RV32-NEXT: lui s10, 4 +; RV32-NEXT: lui s9, 8 +; RV32-NEXT: lui s0, 16 +; RV32-NEXT: lui t6, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a4, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw s4, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a3, 260(sp) +; RV32-NEXT: lui a3, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw a0, 244(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vand.vi v28, v12, 2 +; RV32-NEXT: vand.vi v4, v12, 1 +; RV32-NEXT: vand.vi v24, v12, 4 +; RV32-NEXT: vand.vi v20, v12, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw s3, 236(sp) +; RV32-NEXT: vand.vx v16, v12, s3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi s3, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw s2, 228(sp) +; RV32-NEXT: vand.vx v0, v12, s2 +; RV32-NEXT: addi s2, sp, 264 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s5, 220(sp) +; RV32-NEXT: vmul.vv v16, v8, v28 +; RV32-NEXT: vmul.vv v28, v8, v4 +; RV32-NEXT: vxor.vi v28, v28, 0 +; RV32-NEXT: vxor.vv v28, v28, v16 +; RV32-NEXT: vand.vx v16, v12, s5 +; RV32-NEXT: addi s5, sp, 256 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s6, 212(sp) +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v28, v28, v24 +; RV32-NEXT: vand.vx v24, v12, s6 +; RV32-NEXT: addi s6, sp, 248 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s8, 204(sp) +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v28, v20 +; RV32-NEXT: vand.vx v28, v12, s8 +; RV32-NEXT: addi s8, sp, 240 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s1, 196(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v20, v20, v4 +; RV32-NEXT: vand.vx v4, v12, s1 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw s7, 188(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v20, v20, v0 +; RV32-NEXT: vand.vx v0, v12, s7 +; RV32-NEXT: slli a0, s4, 11 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v20, v20, v16 +; RV32-NEXT: vand.vx v16, v12, ra +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a0, 180(sp) +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw ra, 172(sp) +; RV32-NEXT: addi s4, sp, 216 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v24, v20, v24 +; RV32-NEXT: vand.vx v20, v12, s11 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s11, 164(sp) +; RV32-NEXT: addi s11, sp, 208 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v28, v24, v28 +; RV32-NEXT: vand.vx v24, v12, s10 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s10, 156(sp) +; RV32-NEXT: addi s10, sp, 200 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v4, v28, v4 +; RV32-NEXT: vand.vx v28, v12, s9 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s9, 148(sp) +; RV32-NEXT: addi s9, sp, 192 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v4, v4, v0 +; RV32-NEXT: vand.vx v0, v12, a0 +; RV32-NEXT: addi ra, sp, 184 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v0, v4, v0 +; RV32-NEXT: vand.vx v4, v12, s0 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s0, 140(sp) +; RV32-NEXT: addi s1, sp, 176 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v0, v0, v16 +; RV32-NEXT: vand.vx v16, v12, t6 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw t6, 132(sp) +; RV32-NEXT: addi s0, sp, 168 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v0, v0, v20 +; RV32-NEXT: vand.vx v20, v12, t5 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t5, 124(sp) +; RV32-NEXT: addi t6, sp, 160 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: vand.vx v24, v12, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi t5, sp, 152 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v0, v0, v28 +; RV32-NEXT: vand.vx v28, v12, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t4, sp, 144 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: vand.vx v4, v12, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi t3, sp, 136 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v16, v0, v16 +; RV32-NEXT: vand.vx v0, v12, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t2, sp, 128 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v16, v20 +; RV32-NEXT: vand.vx v16, v12, t0 +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t1, sp, 120 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v24, v20, v24 +; RV32-NEXT: vand.vx v20, v12, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi t0, sp, 112 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v24, v24, v28 +; RV32-NEXT: vand.vx v28, v12, a6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi a7, sp, 104 +; RV32-NEXT: vmul.vv v28, v8, v4 +; RV32-NEXT: vxor.vv v24, v24, v28 +; RV32-NEXT: vand.vx v28, v12, a5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi a6, sp, 96 +; RV32-NEXT: vmul.vv v28, v8, v0 +; RV32-NEXT: vxor.vv v28, v24, v28 +; RV32-NEXT: vand.vx v24, v12, a4 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a4, 52(sp) +; RV32-NEXT: addi a5, sp, 88 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v16, v28, v16 +; RV32-NEXT: vand.vx v28, v12, a3 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a3, 44(sp) +; RV32-NEXT: addi a4, sp, 80 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v4, v12, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: addi a3, sp, 72 +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a1, 262144 +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: addi a2, sp, 64 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v16, v20 +; RV32-NEXT: vlse64.v v16, (s3), zero +; RV32-NEXT: addi s3, sp, 56 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v0, v20, v0 +; RV32-NEXT: vlse64.v v20, (s2), zero +; RV32-NEXT: addi s2, sp, 48 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: vlse64.v v24, (s5), zero +; RV32-NEXT: addi s5, sp, 40 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v0, v0, v28 +; RV32-NEXT: vlse64.v v28, (s6), zero +; RV32-NEXT: addi s6, sp, 32 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v4, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v4, (s8), zero +; RV32-NEXT: addi s8, sp, 24 +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi s7, sp, 232 +; RV32-NEXT: vlse64.v v16, (s7), zero +; RV32-NEXT: addi s7, sp, 224 +; RV32-NEXT: vlse64.v v20, (s7), zero +; RV32-NEXT: vlse64.v v24, (s4), zero +; RV32-NEXT: vlse64.v v28, (s11), zero +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 4 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: mv s7, s4 +; RV32-NEXT: slli s4, s4, 1 +; RV32-NEXT: add s7, s7, s4 +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: add s4, s4, s7 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: mv s7, s4 +; RV32-NEXT: slli s4, s4, 4 +; RV32-NEXT: add s4, s4, s7 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: mv s7, s4 +; RV32-NEXT: slli s4, s4, 1 +; RV32-NEXT: add s7, s7, s4 +; RV32-NEXT: slli s4, s4, 1 +; RV32-NEXT: add s7, s7, s4 +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: add s4, s4, s7 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v20, (s10), zero +; RV32-NEXT: vlse64.v v24, (s9), zero +; RV32-NEXT: vlse64.v v28, (ra), zero +; RV32-NEXT: vlse64.v v4, (s1), zero +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 2 +; RV32-NEXT: mv s4, s1 +; RV32-NEXT: slli s1, s1, 1 +; RV32-NEXT: add s1, s1, s4 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 3 +; RV32-NEXT: mv s4, s1 +; RV32-NEXT: slli s1, s1, 2 +; RV32-NEXT: add s1, s1, s4 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 6 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v4 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 3 +; RV32-NEXT: mv s4, s1 +; RV32-NEXT: slli s1, s1, 1 +; RV32-NEXT: add s4, s4, s1 +; RV32-NEXT: slli s1, s1, 2 +; RV32-NEXT: add s1, s1, s4 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s0), zero +; RV32-NEXT: vlse64.v v28, (t6), zero +; RV32-NEXT: vlse64.v v4, (t5), zero +; RV32-NEXT: vlse64.v v0, (t4), zero +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 3 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: mv t5, t4 +; RV32-NEXT: slli t4, t4, 3 +; RV32-NEXT: add t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v4 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: mv t5, t4 +; RV32-NEXT: slli t4, t4, 1 +; RV32-NEXT: add t5, t5, t4 +; RV32-NEXT: slli t4, t4, 1 +; RV32-NEXT: add t5, t5, t4 +; RV32-NEXT: slli t4, t4, 1 +; RV32-NEXT: add t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v0 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: mv t5, t4 +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: add t5, t5, t4 +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: add t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v28, (t3), zero +; RV32-NEXT: vlse64.v v4, (t2), zero +; RV32-NEXT: vlse64.v v0, (t1), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: vand.vv v20, v12, v28 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 2 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v20, v12, v4 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 5 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v20, v12, v0 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 3 +; RV32-NEXT: mv t1, t0 +; RV32-NEXT: slli t0, t0, 1 +; RV32-NEXT: add t1, t1, t0 +; RV32-NEXT: slli t0, t0, 1 +; RV32-NEXT: add t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 4 +; RV32-NEXT: mv t1, t0 +; RV32-NEXT: slli t0, t0, 2 +; RV32-NEXT: add t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v16, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v16, (a7), zero +; RV32-NEXT: vlse64.v v0, (a6), zero +; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: vand.vv v4, v12, v16 +; RV32-NEXT: vand.vv v16, v12, v0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: mv a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 288 +; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: mv a5, a4 +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 288 +; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: mv a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 288 +; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (s3), zero +; RV32-NEXT: vlse64.v v28, (s2), zero +; RV32-NEXT: vand.vv v0, v12, v16 +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 288 +; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 288 +; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 288 +; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v16, (s5), zero +; RV32-NEXT: vlse64.v v20, (s6), zero +; RV32-NEXT: vlse64.v v24, (s8), zero +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v24, v12, v24 +; RV32-NEXT: vand.vv v20, v12, v28 +; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v12, v16, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v16, v8, v4 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v16, v8, v0 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v16, v8, v24 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v12, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vand.vi v16, v12, 2 +; RV64-NEXT: vand.vi v20, v12, 1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v20, v16 +; RV64-NEXT: vand.vi v20, v12, 4 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vi v20, v12, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vxor.vv v8, v16, v8 +; RV64-NEXT: ret + %a = call @llvm.clmul.nxv4i64( %x, %y) + ret %a +} + +define @clmul_nxv8i64( %x, %y) nounwind { +; RV32-LABEL: clmul_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li s5, 1 +; RV32-NEXT: li a3, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li s10, 8 +; RV32-NEXT: li a0, 16 +; RV32-NEXT: li t6, 32 +; RV32-NEXT: li s1, 64 +; RV32-NEXT: li s3, 128 +; RV32-NEXT: li s7, 256 +; RV32-NEXT: li s4, 512 +; RV32-NEXT: li s8, 1024 +; RV32-NEXT: lui ra, 1 +; RV32-NEXT: lui s11, 2 +; RV32-NEXT: lui s9, 4 +; RV32-NEXT: lui s6, 8 +; RV32-NEXT: lui s2, 16 +; RV32-NEXT: lui s0, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a4, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw s5, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a3, 260(sp) +; RV32-NEXT: lui a3, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s10, 244(sp) +; RV32-NEXT: vsetvli s10, zero, e64, m8, ta, ma +; RV32-NEXT: vand.vi v24, v16, 2 +; RV32-NEXT: vand.vi v0, v16, 1 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vi v0, v0, 0 +; RV32-NEXT: vxor.vv v24, v0, v24 +; RV32-NEXT: vand.vi v0, v16, 4 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vi v0, v16, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw a0, 236(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, a0 +; RV32-NEXT: addi s10, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw t6, 228(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, t6 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s1, 220(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s1 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s3, 212(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s3 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s7, 204(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s7 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s4, 196(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s4 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw s8, 188(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s8 +; RV32-NEXT: slli s5, s5, 11 +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw s5, 180(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s5 +; RV32-NEXT: addi s5, sp, 216 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, ra +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw ra, 172(sp) +; RV32-NEXT: addi ra, sp, 208 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s11 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s11, 164(sp) +; RV32-NEXT: addi s11, sp, 200 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s9 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s9, 156(sp) +; RV32-NEXT: addi s9, sp, 192 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s6 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s6, 148(sp) +; RV32-NEXT: addi s6, sp, 184 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s2 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s2, 140(sp) +; RV32-NEXT: addi s3, sp, 176 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, s0 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s0, 132(sp) +; RV32-NEXT: addi s4, sp, 168 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, t5 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t5, 124(sp) +; RV32-NEXT: addi s2, sp, 160 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi s1, sp, 152 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t6, sp, 144 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi s0, sp, 136 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t5, sp, 128 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, t0 +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t4, sp, 120 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi t2, sp, 112 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, a6 +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi t3, sp, 104 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, a5 +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi t1, sp, 96 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, a4 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a4, 52(sp) +; RV32-NEXT: addi t0, sp, 88 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, a3 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a3, 44(sp) +; RV32-NEXT: addi a7, sp, 80 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vand.vx v0, v16, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a0, 262144 +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: sw t2, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s10), zero +; RV32-NEXT: addi a6, sp, 72 +; RV32-NEXT: addi a5, sp, 64 +; RV32-NEXT: addi a4, sp, 56 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 8 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: addi s10, sp, 40 +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: addi s7, sp, 264 +; RV32-NEXT: vlse64.v v24, (s7), zero +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi s7, sp, 256 +; RV32-NEXT: vlse64.v v0, (s7), zero +; RV32-NEXT: addi s7, sp, 248 +; RV32-NEXT: vlse64.v v24, (s7), zero +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi s7, sp, 240 +; RV32-NEXT: vlse64.v v24, (s7), zero +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 5 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add s7, s7, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, s7 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi s7, sp, 16 +; RV32-NEXT: addi s8, sp, 232 +; RV32-NEXT: vlse64.v v24, (s8), zero +; RV32-NEXT: csrr s8, vlenb +; RV32-NEXT: slli s8, s8, 4 +; RV32-NEXT: mv t2, s8 +; RV32-NEXT: slli s8, s8, 2 +; RV32-NEXT: add t2, t2, s8 +; RV32-NEXT: slli s8, s8, 1 +; RV32-NEXT: add s8, s8, t2 +; RV32-NEXT: lw t2, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s8, sp, s8 +; RV32-NEXT: addi s8, s8, 288 +; RV32-NEXT: vs8r.v v24, (s8) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi s8, sp, 224 +; RV32-NEXT: vlse64.v v0, (s8), zero +; RV32-NEXT: vlse64.v v24, (s5), zero +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 2 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (ra), zero +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 4 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 2 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 4 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 2 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 2 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 2 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 2 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v0, (s11), zero +; RV32-NEXT: vlse64.v v24, (s9), zero +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: mv s8, s5 +; RV32-NEXT: slli s5, s5, 3 +; RV32-NEXT: add s8, s8, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s8 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s6), zero +; RV32-NEXT: csrr s5, vlenb +; RV32-NEXT: slli s5, s5, 4 +; RV32-NEXT: mv s6, s5 +; RV32-NEXT: slli s5, s5, 2 +; RV32-NEXT: add s6, s6, s5 +; RV32-NEXT: slli s5, s5, 1 +; RV32-NEXT: add s5, s5, s6 +; RV32-NEXT: add s5, sp, s5 +; RV32-NEXT: addi s5, s5, 288 +; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s3), zero +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 6 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v0, v16, v0 +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 4 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vs8r.v v0, (s3) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 3 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 3 +; RV32-NEXT: add s5, s5, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 3 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 2 +; RV32-NEXT: add s5, s5, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 4 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 2 +; RV32-NEXT: add s5, s5, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 5 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 2 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 6 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 4 +; RV32-NEXT: mv s5, s3 +; RV32-NEXT: slli s3, s3, 2 +; RV32-NEXT: add s5, s5, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s5 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s4), zero +; RV32-NEXT: csrr s3, vlenb +; RV32-NEXT: slli s3, s3, 6 +; RV32-NEXT: mv s4, s3 +; RV32-NEXT: slli s3, s3, 1 +; RV32-NEXT: add s3, s3, s4 +; RV32-NEXT: add s3, sp, s3 +; RV32-NEXT: addi s3, s3, 288 +; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v0, (s2), zero +; RV32-NEXT: vlse64.v v24, (s1), zero +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 3 +; RV32-NEXT: mv s2, s1 +; RV32-NEXT: slli s1, s1, 3 +; RV32-NEXT: add s2, s2, s1 +; RV32-NEXT: slli s1, s1, 1 +; RV32-NEXT: add s1, s1, s2 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs8r.v v24, (s1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (t6), zero +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add s1, s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add s1, s1, t6 +; RV32-NEXT: slli t6, t6, 2 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 6 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 2 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 5 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: add s1, s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add s1, s1, t6 +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add s1, s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add s1, s1, t6 +; RV32-NEXT: slli t6, t6, 2 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: mv s1, t6 +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: add s1, s1, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add t6, t6, s1 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s0), zero +; RV32-NEXT: csrr t6, vlenb +; RV32-NEXT: slli t6, t6, 3 +; RV32-NEXT: mv s0, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add s0, s0, t6 +; RV32-NEXT: slli t6, t6, 1 +; RV32-NEXT: add s0, s0, t6 +; RV32-NEXT: slli t6, t6, 2 +; RV32-NEXT: add t6, t6, s0 +; RV32-NEXT: add t6, sp, t6 +; RV32-NEXT: addi t6, t6, 288 +; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v0, (t5), zero +; RV32-NEXT: vlse64.v v24, (t4), zero +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 6 +; RV32-NEXT: mv t5, t4 +; RV32-NEXT: slli t4, t4, 1 +; RV32-NEXT: add t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs8r.v v24, (t4) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (t2), zero +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: mv t4, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t4, t4, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add t2, t2, t4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv t4, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t4, t4, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t4, t4, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add t2, t2, t4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 5 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: mv t4, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t4, t4, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add t2, t2, t4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 6 +; RV32-NEXT: mv t4, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, t4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: mv t4, t2 +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: add t2, t2, t4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: mv t4, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t4, t4, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add t2, t2, t4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 6 +; RV32-NEXT: mv t4, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, t4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (t3), zero +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t3, t3, t2 +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v0, (t1), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 3 +; RV32-NEXT: mv t1, t0 +; RV32-NEXT: slli t0, t0, 1 +; RV32-NEXT: add t1, t1, t0 +; RV32-NEXT: slli t0, t0, 1 +; RV32-NEXT: add t1, t1, t0 +; RV32-NEXT: slli t0, t0, 2 +; RV32-NEXT: add t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs8r.v v24, (t0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 7 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 4 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add t0, t0, a7 +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 4 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add t0, t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add t0, t0, a7 +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 4 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 7 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add t0, t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add t0, t0, a7 +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a6), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v0, (a5), zero +; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: mv a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 288 +; RV32-NEXT: vs8r.v v24, (a4) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a4, a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a4, a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 7 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a4, a4, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 7 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a4, a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a4, a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a4, a4, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s10), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v0, (a3), zero +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s7), zero +; RV32-NEXT: addi a1, sp, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a1, sp, 288 +; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v0, v16, v24 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v16, v24, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: vmul.vv v8, v8, v0 +; RV32-NEXT: vxor.vv v8, v16, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64-NEXT: vand.vi v24, v16, 2 +; RV64-NEXT: vand.vi v0, v16, 1 +; RV64-NEXT: vmul.vv v24, v8, v24 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v0, v24 +; RV64-NEXT: vand.vi v0, v16, 4 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vi v0, v16, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v0, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vxor.vv v8, v24, v8 +; RV64-NEXT: ret + %a = call @llvm.clmul.nxv8i64( %x, %y) + ret %a +} + +define @clmulr_nxv1i32( %x, %y) nounwind { +; CHECK-LABEL: clmulr_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a4, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vsll.vi v11, v8, 24 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: lui a5, 349525 +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: addi a3, a4, -256 +; CHECK-NEXT: addi a2, a0, -241 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: addi a0, a5, 1365 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v11, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vand.vx v9, v8, a6 +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: vand.vx v10, v8, a5 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vand.vx v11, v8, a5 +; CHECK-NEXT: li a5, 128 +; CHECK-NEXT: vand.vx v12, v8, a5 +; CHECK-NEXT: li a5, 256 +; CHECK-NEXT: vand.vx v13, v8, a5 +; CHECK-NEXT: li a5, 512 +; CHECK-NEXT: vand.vx v14, v8, a5 +; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: vand.vx v15, v8, a5 +; CHECK-NEXT: li a5, 1 +; CHECK-NEXT: slli a5, a5, 11 +; CHECK-NEXT: vand.vx v16, v8, a5 +; CHECK-NEXT: lui a5, 1 +; CHECK-NEXT: vand.vx v17, v8, a5 +; CHECK-NEXT: lui a5, 2 +; CHECK-NEXT: vand.vx v18, v8, a5 +; CHECK-NEXT: lui a5, 4 +; CHECK-NEXT: vand.vx v19, v8, a5 +; CHECK-NEXT: lui a5, 8 +; CHECK-NEXT: vand.vx v20, v8, a5 +; CHECK-NEXT: lui a5, 32 +; CHECK-NEXT: vand.vx v21, v8, a4 +; CHECK-NEXT: lui a4, 64 +; CHECK-NEXT: vand.vx v22, v8, a5 +; CHECK-NEXT: lui a5, 128 +; CHECK-NEXT: vand.vx v23, v8, a4 +; CHECK-NEXT: lui a4, 256 +; CHECK-NEXT: vand.vx v24, v8, a5 +; CHECK-NEXT: lui a5, 512 +; CHECK-NEXT: vand.vx v25, v8, a4 +; CHECK-NEXT: lui a4, 1024 +; CHECK-NEXT: vand.vx v26, v8, a5 +; CHECK-NEXT: lui a5, 2048 +; CHECK-NEXT: vand.vx v27, v8, a4 +; CHECK-NEXT: lui a4, 4096 +; CHECK-NEXT: vand.vx v28, v8, a5 +; CHECK-NEXT: lui a5, 8192 +; CHECK-NEXT: vand.vx v29, v8, a4 +; CHECK-NEXT: lui a4, 16384 +; CHECK-NEXT: vand.vx v30, v8, a5 +; CHECK-NEXT: lui a5, 32768 +; CHECK-NEXT: vand.vx v31, v8, a4 +; CHECK-NEXT: lui a4, 65536 +; CHECK-NEXT: vand.vx v7, v8, a5 +; CHECK-NEXT: lui a5, 131072 +; CHECK-NEXT: vand.vx v6, v8, a4 +; CHECK-NEXT: lui a4, 262144 +; CHECK-NEXT: vand.vx v5, v8, a5 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: vand.vi v4, v8, 2 +; CHECK-NEXT: vand.vi v3, v8, 1 +; CHECK-NEXT: vand.vi v2, v8, 4 +; CHECK-NEXT: vand.vi v1, v8, 8 +; CHECK-NEXT: vand.vx v0, v8, a4 +; CHECK-NEXT: vmul.vv v4, v8, v4 +; CHECK-NEXT: vmul.vv v3, v8, v3 +; CHECK-NEXT: vmul.vv v2, v8, v2 +; CHECK-NEXT: vmul.vv v1, v8, v1 +; CHECK-NEXT: vmul.vv v9, v8, v9 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vmul.vv v13, v8, v13 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vmul.vv v15, v8, v15 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vmul.vv v17, v8, v17 +; CHECK-NEXT: vmul.vv v18, v8, v18 +; CHECK-NEXT: vmul.vv v19, v8, v19 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vmul.vv v21, v8, v21 +; CHECK-NEXT: vmul.vv v22, v8, v22 +; CHECK-NEXT: vmul.vv v23, v8, v23 +; CHECK-NEXT: vmul.vv v24, v8, v24 +; CHECK-NEXT: vmul.vv v25, v8, v25 +; CHECK-NEXT: vmul.vv v26, v8, v26 +; CHECK-NEXT: vmul.vv v27, v8, v27 +; CHECK-NEXT: vmul.vv v28, v8, v28 +; CHECK-NEXT: vmul.vv v29, v8, v29 +; CHECK-NEXT: vmul.vv v30, v8, v30 +; CHECK-NEXT: vmul.vv v31, v8, v31 +; CHECK-NEXT: vmul.vv v7, v8, v7 +; CHECK-NEXT: vmul.vv v6, v8, v6 +; CHECK-NEXT: vmul.vv v5, v8, v5 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a4) # vscale x 8-byte Folded Spill +; CHECK-NEXT: vand.vx v0, v8, a5 +; CHECK-NEXT: vmul.vv v8, v8, v0 +; CHECK-NEXT: vxor.vv v4, v3, v4 +; CHECK-NEXT: vxor.vv v4, v4, v2 +; CHECK-NEXT: vxor.vv v4, v4, v1 +; CHECK-NEXT: vxor.vv v9, v4, v9 +; CHECK-NEXT: vxor.vv v9, v9, v10 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vxor.vv v9, v9, v12 +; CHECK-NEXT: vxor.vv v10, v9, v13 +; CHECK-NEXT: vxor.vv v10, v10, v14 +; CHECK-NEXT: vxor.vv v10, v10, v15 +; CHECK-NEXT: vxor.vv v10, v10, v16 +; CHECK-NEXT: vxor.vv v10, v10, v17 +; CHECK-NEXT: vxor.vv v10, v10, v18 +; CHECK-NEXT: vxor.vv v10, v10, v19 +; CHECK-NEXT: vxor.vv v10, v10, v20 +; CHECK-NEXT: vxor.vv v10, v10, v21 +; CHECK-NEXT: vxor.vv v10, v10, v22 +; CHECK-NEXT: vxor.vv v10, v10, v23 +; CHECK-NEXT: vxor.vv v10, v10, v24 +; CHECK-NEXT: vxor.vv v10, v10, v25 +; CHECK-NEXT: vxor.vv v10, v10, v26 +; CHECK-NEXT: vxor.vv v10, v10, v27 +; CHECK-NEXT: vxor.vv v10, v10, v28 +; CHECK-NEXT: vsll.vi v9, v9, 24 +; CHECK-NEXT: vxor.vv v11, v10, v29 +; CHECK-NEXT: vxor.vv v11, v11, v30 +; CHECK-NEXT: vand.vx v12, v10, a3 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vor.vv v9, v9, v12 +; CHECK-NEXT: vxor.vv v11, v11, v31 +; CHECK-NEXT: vxor.vv v11, v11, v7 +; CHECK-NEXT: vxor.vv v11, v11, v6 +; CHECK-NEXT: vxor.vv v11, v11, v5 +; CHECK-NEXT: vsrl.vi v10, v10, 8 +; CHECK-NEXT: vand.vx v10, v10, a3 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl1r.v v12, (a3) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vxor.vv v11, v11, v12 +; CHECK-NEXT: vxor.vv v8, v11, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %a = call @llvm.clmulr.nxv1i32( %x, %y) + ret %a +} + +define @clmulr_nxv2i32( %x, %y) nounwind { +; CHECK-LABEL: clmulr_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a4, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vsll.vi v11, v8, 24 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: lui a5, 349525 +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: addi a3, a4, -256 +; CHECK-NEXT: addi a2, a0, -241 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: addi a0, a5, 1365 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v11, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vand.vx v9, v8, a6 +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: vand.vx v10, v8, a5 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vand.vx v11, v8, a5 +; CHECK-NEXT: li a5, 128 +; CHECK-NEXT: vand.vx v12, v8, a5 +; CHECK-NEXT: li a5, 256 +; CHECK-NEXT: vand.vx v13, v8, a5 +; CHECK-NEXT: li a5, 512 +; CHECK-NEXT: vand.vx v14, v8, a5 +; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: vand.vx v15, v8, a5 +; CHECK-NEXT: li a5, 1 +; CHECK-NEXT: slli a5, a5, 11 +; CHECK-NEXT: vand.vx v16, v8, a5 +; CHECK-NEXT: lui a5, 1 +; CHECK-NEXT: vand.vx v17, v8, a5 +; CHECK-NEXT: lui a5, 2 +; CHECK-NEXT: vand.vx v18, v8, a5 +; CHECK-NEXT: lui a5, 4 +; CHECK-NEXT: vand.vx v19, v8, a5 +; CHECK-NEXT: lui a5, 8 +; CHECK-NEXT: vand.vx v20, v8, a5 +; CHECK-NEXT: lui a5, 32 +; CHECK-NEXT: vand.vx v21, v8, a4 +; CHECK-NEXT: lui a4, 64 +; CHECK-NEXT: vand.vx v22, v8, a5 +; CHECK-NEXT: lui a5, 128 +; CHECK-NEXT: vand.vx v23, v8, a4 +; CHECK-NEXT: lui a4, 256 +; CHECK-NEXT: vand.vx v24, v8, a5 +; CHECK-NEXT: lui a5, 512 +; CHECK-NEXT: vand.vx v25, v8, a4 +; CHECK-NEXT: lui a4, 1024 +; CHECK-NEXT: vand.vx v26, v8, a5 +; CHECK-NEXT: lui a5, 2048 +; CHECK-NEXT: vand.vx v27, v8, a4 +; CHECK-NEXT: lui a4, 4096 +; CHECK-NEXT: vand.vx v28, v8, a5 +; CHECK-NEXT: lui a5, 8192 +; CHECK-NEXT: vand.vx v29, v8, a4 +; CHECK-NEXT: lui a4, 16384 +; CHECK-NEXT: vand.vx v30, v8, a5 +; CHECK-NEXT: lui a5, 32768 +; CHECK-NEXT: vand.vx v31, v8, a4 +; CHECK-NEXT: lui a4, 65536 +; CHECK-NEXT: vand.vx v7, v8, a5 +; CHECK-NEXT: lui a5, 131072 +; CHECK-NEXT: vand.vx v6, v8, a4 +; CHECK-NEXT: lui a4, 262144 +; CHECK-NEXT: vand.vx v5, v8, a5 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: vand.vi v4, v8, 2 +; CHECK-NEXT: vand.vi v3, v8, 1 +; CHECK-NEXT: vand.vi v2, v8, 4 +; CHECK-NEXT: vand.vi v1, v8, 8 +; CHECK-NEXT: vand.vx v0, v8, a4 +; CHECK-NEXT: vmul.vv v4, v8, v4 +; CHECK-NEXT: vmul.vv v3, v8, v3 +; CHECK-NEXT: vmul.vv v2, v8, v2 +; CHECK-NEXT: vmul.vv v1, v8, v1 +; CHECK-NEXT: vmul.vv v9, v8, v9 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vmul.vv v13, v8, v13 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vmul.vv v15, v8, v15 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vmul.vv v17, v8, v17 +; CHECK-NEXT: vmul.vv v18, v8, v18 +; CHECK-NEXT: vmul.vv v19, v8, v19 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vmul.vv v21, v8, v21 +; CHECK-NEXT: vmul.vv v22, v8, v22 +; CHECK-NEXT: vmul.vv v23, v8, v23 +; CHECK-NEXT: vmul.vv v24, v8, v24 +; CHECK-NEXT: vmul.vv v25, v8, v25 +; CHECK-NEXT: vmul.vv v26, v8, v26 +; CHECK-NEXT: vmul.vv v27, v8, v27 +; CHECK-NEXT: vmul.vv v28, v8, v28 +; CHECK-NEXT: vmul.vv v29, v8, v29 +; CHECK-NEXT: vmul.vv v30, v8, v30 +; CHECK-NEXT: vmul.vv v31, v8, v31 +; CHECK-NEXT: vmul.vv v7, v8, v7 +; CHECK-NEXT: vmul.vv v6, v8, v6 +; CHECK-NEXT: vmul.vv v5, v8, v5 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a4) # vscale x 8-byte Folded Spill +; CHECK-NEXT: vand.vx v0, v8, a5 +; CHECK-NEXT: vmul.vv v8, v8, v0 +; CHECK-NEXT: vxor.vv v4, v3, v4 +; CHECK-NEXT: vxor.vv v4, v4, v2 +; CHECK-NEXT: vxor.vv v4, v4, v1 +; CHECK-NEXT: vxor.vv v9, v4, v9 +; CHECK-NEXT: vxor.vv v9, v9, v10 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vxor.vv v9, v9, v12 +; CHECK-NEXT: vxor.vv v10, v9, v13 +; CHECK-NEXT: vxor.vv v10, v10, v14 +; CHECK-NEXT: vxor.vv v10, v10, v15 +; CHECK-NEXT: vxor.vv v10, v10, v16 +; CHECK-NEXT: vxor.vv v10, v10, v17 +; CHECK-NEXT: vxor.vv v10, v10, v18 +; CHECK-NEXT: vxor.vv v10, v10, v19 +; CHECK-NEXT: vxor.vv v10, v10, v20 +; CHECK-NEXT: vxor.vv v10, v10, v21 +; CHECK-NEXT: vxor.vv v10, v10, v22 +; CHECK-NEXT: vxor.vv v10, v10, v23 +; CHECK-NEXT: vxor.vv v10, v10, v24 +; CHECK-NEXT: vxor.vv v10, v10, v25 +; CHECK-NEXT: vxor.vv v10, v10, v26 +; CHECK-NEXT: vxor.vv v10, v10, v27 +; CHECK-NEXT: vxor.vv v10, v10, v28 +; CHECK-NEXT: vsll.vi v9, v9, 24 +; CHECK-NEXT: vxor.vv v11, v10, v29 +; CHECK-NEXT: vxor.vv v11, v11, v30 +; CHECK-NEXT: vand.vx v12, v10, a3 +; CHECK-NEXT: vsll.vi v12, v12, 8 +; CHECK-NEXT: vor.vv v9, v9, v12 +; CHECK-NEXT: vxor.vv v11, v11, v31 +; CHECK-NEXT: vxor.vv v11, v11, v7 +; CHECK-NEXT: vxor.vv v11, v11, v6 +; CHECK-NEXT: vxor.vv v11, v11, v5 +; CHECK-NEXT: vsrl.vi v10, v10, 8 +; CHECK-NEXT: vand.vx v10, v10, a3 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vl1r.v v12, (a3) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vxor.vv v11, v11, v12 +; CHECK-NEXT: vxor.vv v8, v11, v8 +; CHECK-NEXT: vsrl.vi v8, v8, 24 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %a = call @llvm.clmulr.nxv2i32( %x, %y) + ret %a +} + +define @clmulr_nxv4i32( %x, %y) nounwind { +; RV32-LABEL: clmulr_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: sw s0, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 52(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 48(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: lui a0, 16 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vi v14, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: lui s6, 349525 +; RV32-NEXT: li t2, 16 +; RV32-NEXT: li t5, 32 +; RV32-NEXT: li s2, 64 +; RV32-NEXT: li s5, 128 +; RV32-NEXT: li s4, 256 +; RV32-NEXT: li s3, 512 +; RV32-NEXT: li s1, 1024 +; RV32-NEXT: li s0, 1 +; RV32-NEXT: lui t6, 1 +; RV32-NEXT: lui t4, 2 +; RV32-NEXT: lui t3, 4 +; RV32-NEXT: lui a5, 8 +; RV32-NEXT: lui a6, 32 +; RV32-NEXT: lui a7, 64 +; RV32-NEXT: lui t0, 128 +; RV32-NEXT: lui t1, 256 +; RV32-NEXT: addi a4, a0, -256 +; RV32-NEXT: addi a3, a1, -241 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: addi a1, s6, 1365 +; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v14, v8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vand.vx v10, v8, t2 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: vand.vx v12, v8, t5 +; RV32-NEXT: lui t5, 1024 +; RV32-NEXT: vand.vx v14, v8, s2 +; RV32-NEXT: lui s2, 2048 +; RV32-NEXT: vand.vx v16, v8, s5 +; RV32-NEXT: lui s5, 4096 +; RV32-NEXT: vand.vx v26, v8, s4 +; RV32-NEXT: lui s4, 8192 +; RV32-NEXT: vand.vx v28, v8, s3 +; RV32-NEXT: lui s3, 16384 +; RV32-NEXT: vand.vx v18, v8, s1 +; RV32-NEXT: lui s1, 32768 +; RV32-NEXT: slli s0, s0, 11 +; RV32-NEXT: vand.vx v20, v8, s0 +; RV32-NEXT: lui s0, 65536 +; RV32-NEXT: vand.vx v22, v8, t6 +; RV32-NEXT: lui t6, 131072 +; RV32-NEXT: vand.vx v24, v8, t4 +; RV32-NEXT: lui t4, 262144 +; RV32-NEXT: vand.vx v30, v8, t3 +; RV32-NEXT: lui t3, 524288 +; RV32-NEXT: vand.vi v6, v8, 2 +; RV32-NEXT: vand.vi v4, v8, 1 +; RV32-NEXT: vand.vi v2, v8, 4 +; RV32-NEXT: vand.vi v0, v8, 8 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v6, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v6, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v6, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v24 +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: slli s6, s6, 1 +; RV32-NEXT: mv a0, s6 +; RV32-NEXT: slli s6, s6, 2 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 32 +; RV32-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v30 +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: slli s6, s6, 3 +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 32 +; RV32-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: mv s6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a5, a5, s6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 32 +; RV32-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a6 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a7 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t0 +; RV32-NEXT: vmul.vv v6, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t1 +; RV32-NEXT: vmul.vv v30, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t2 +; RV32-NEXT: vmul.vv v28, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t5 +; RV32-NEXT: vmul.vv v26, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s2 +; RV32-NEXT: vmul.vv v22, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s5 +; RV32-NEXT: vmul.vv v18, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s4 +; RV32-NEXT: vmul.vv v16, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s3 +; RV32-NEXT: vmul.vv v24, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s1 +; RV32-NEXT: vmul.vv v20, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s0 +; RV32-NEXT: vmul.vv v12, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t6 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vand.vx v14, v8, t4 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vand.vx v0, v8, t3 +; RV32-NEXT: vmul.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v0, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v4, v2, v4 +; RV32-NEXT: vxor.vv v6, v4, v6 +; RV32-NEXT: vxor.vv v30, v6, v30 +; RV32-NEXT: vxor.vv v28, v30, v28 +; RV32-NEXT: vxor.vv v26, v28, v26 +; RV32-NEXT: vxor.vv v22, v26, v22 +; RV32-NEXT: vsll.vi v26, v0, 24 +; RV32-NEXT: vxor.vv v18, v22, v18 +; RV32-NEXT: vxor.vv v16, v18, v16 +; RV32-NEXT: vand.vx v18, v22, a4 +; RV32-NEXT: vsll.vi v18, v18, 8 +; RV32-NEXT: vor.vv v18, v26, v18 +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: vxor.vv v16, v16, v20 +; RV32-NEXT: vxor.vv v12, v16, v12 +; RV32-NEXT: vxor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v22, 8 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vxor.vv v10, v10, v14 +; RV32-NEXT: vxor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v18, v8 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw s0, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 52(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 48(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -96 +; RV64-NEXT: sd s0, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vsrl.vi v10, v8, 8 +; RV64-NEXT: lui a0, 16 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: vsll.vi v14, v8, 24 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui s6, 349525 +; RV64-NEXT: li t2, 16 +; RV64-NEXT: li t5, 32 +; RV64-NEXT: li s2, 64 +; RV64-NEXT: li s5, 128 +; RV64-NEXT: li s4, 256 +; RV64-NEXT: li s3, 512 +; RV64-NEXT: li s1, 1024 +; RV64-NEXT: li s0, 1 +; RV64-NEXT: lui t6, 1 +; RV64-NEXT: lui t4, 2 +; RV64-NEXT: lui t3, 4 +; RV64-NEXT: lui a5, 8 +; RV64-NEXT: lui a6, 32 +; RV64-NEXT: lui a7, 64 +; RV64-NEXT: lui t0, 128 +; RV64-NEXT: lui t1, 256 +; RV64-NEXT: addi a4, a0, -256 +; RV64-NEXT: addi a3, a1, -241 +; RV64-NEXT: addi a2, a2, 819 +; RV64-NEXT: addi a1, s6, 1365 +; RV64-NEXT: vand.vx v10, v10, a4 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vor.vv v10, v10, v12 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v10, v10, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, t2 +; RV64-NEXT: lui t2, 512 +; RV64-NEXT: vand.vx v12, v8, t5 +; RV64-NEXT: lui t5, 1024 +; RV64-NEXT: vand.vx v14, v8, s2 +; RV64-NEXT: lui s2, 2048 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: vand.vx v26, v8, s4 +; RV64-NEXT: lui s4, 8192 +; RV64-NEXT: vand.vx v28, v8, s3 +; RV64-NEXT: lui s3, 16384 +; RV64-NEXT: vand.vx v18, v8, s1 +; RV64-NEXT: lui s1, 32768 +; RV64-NEXT: slli s0, s0, 11 +; RV64-NEXT: vand.vx v20, v8, s0 +; RV64-NEXT: lui s0, 65536 +; RV64-NEXT: vand.vx v22, v8, t6 +; RV64-NEXT: lui t6, 131072 +; RV64-NEXT: vand.vx v24, v8, t4 +; RV64-NEXT: lui t4, 262144 +; RV64-NEXT: vand.vx v30, v8, t3 +; RV64-NEXT: lui t3, 524288 +; RV64-NEXT: vand.vi v6, v8, 2 +; RV64-NEXT: vand.vi v4, v8, 1 +; RV64-NEXT: vand.vi v2, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v6, v8, v6 +; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v14 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v26 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v18 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v20 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v22 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v24 +; RV64-NEXT: csrr s6, vlenb +; RV64-NEXT: slli s6, s6, 1 +; RV64-NEXT: mv a0, s6 +; RV64-NEXT: slli s6, s6, 2 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s6, sp, s6 +; RV64-NEXT: addi s6, s6, 32 +; RV64-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v30 +; RV64-NEXT: csrr s6, vlenb +; RV64-NEXT: slli s6, s6, 3 +; RV64-NEXT: add s6, sp, s6 +; RV64-NEXT: addi s6, s6, 32 +; RV64-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a6 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a7 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, t0 +; RV64-NEXT: vmul.vv v6, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t1 +; RV64-NEXT: vmul.vv v30, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t2 +; RV64-NEXT: vmul.vv v28, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t5 +; RV64-NEXT: vmul.vv v26, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s2 +; RV64-NEXT: vmul.vv v22, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v18, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s4 +; RV64-NEXT: vmul.vv v16, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s3 +; RV64-NEXT: vmul.vv v24, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s1 +; RV64-NEXT: vmul.vv v20, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s0 +; RV64-NEXT: vmul.vv v12, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t6 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vand.vx v14, v8, t4 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vand.vx v0, v8, t3 +; RV64-NEXT: vmul.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v4, v2, v4 +; RV64-NEXT: vxor.vv v6, v4, v6 +; RV64-NEXT: vxor.vv v30, v6, v30 +; RV64-NEXT: vxor.vv v28, v30, v28 +; RV64-NEXT: vxor.vv v26, v28, v26 +; RV64-NEXT: vxor.vv v22, v26, v22 +; RV64-NEXT: vsll.vi v26, v0, 24 +; RV64-NEXT: vxor.vv v18, v22, v18 +; RV64-NEXT: vxor.vv v16, v18, v16 +; RV64-NEXT: vand.vx v18, v22, a4 +; RV64-NEXT: vsll.vi v18, v18, 8 +; RV64-NEXT: vor.vv v18, v26, v18 +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vxor.vv v12, v16, v12 +; RV64-NEXT: vxor.vv v10, v12, v10 +; RV64-NEXT: vsrl.vi v12, v22, 8 +; RV64-NEXT: vand.vx v12, v12, a4 +; RV64-NEXT: vxor.vv v10, v10, v14 +; RV64-NEXT: vxor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v8, v8, 24 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v18, v8 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v10, v10, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld s0, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: ret + %a = call @llvm.clmulr.nxv4i32( %x, %y) + ret %a +} + +define @clmulr_nxv8i32( %x, %y) nounwind { +; RV32-LABEL: clmulr_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -80 +; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: lui a5, 16 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vi v20, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: lui ra, 349525 +; RV32-NEXT: li s9, 16 +; RV32-NEXT: li s8, 32 +; RV32-NEXT: li s6, 64 +; RV32-NEXT: li a7, 512 +; RV32-NEXT: li t0, 1024 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: lui t1, 1 +; RV32-NEXT: lui t2, 2 +; RV32-NEXT: lui t3, 4 +; RV32-NEXT: lui t4, 8 +; RV32-NEXT: lui t5, 32 +; RV32-NEXT: lui t6, 64 +; RV32-NEXT: lui s0, 128 +; RV32-NEXT: lui s1, 256 +; RV32-NEXT: lui s2, 512 +; RV32-NEXT: lui s3, 1024 +; RV32-NEXT: lui s4, 2048 +; RV32-NEXT: lui s5, 4096 +; RV32-NEXT: lui s7, 8192 +; RV32-NEXT: lui s10, 16384 +; RV32-NEXT: lui s11, 32768 +; RV32-NEXT: addi a4, a5, -256 +; RV32-NEXT: addi a3, a1, -241 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: addi a1, ra, 1365 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vi v12, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vand.vx v12, v8, s9 +; RV32-NEXT: lui s9, 65536 +; RV32-NEXT: vand.vx v16, v8, s8 +; RV32-NEXT: lui s8, 131072 +; RV32-NEXT: vand.vx v20, v8, s6 +; RV32-NEXT: lui s6, 262144 +; RV32-NEXT: slli ra, a0, 11 +; RV32-NEXT: vand.vi v24, v8, 2 +; RV32-NEXT: vand.vi v28, v8, 1 +; RV32-NEXT: vand.vi v4, v8, 4 +; RV32-NEXT: vand.vi v0, v8, 8 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v24, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v24, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: vand.vx v12, v8, a6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: mv a6, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a6, a6, a4 +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a6, 256 +; RV32-NEXT: vand.vx v12, v8, a6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv a4, a6 +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: add a6, a6, a4 +; RV32-NEXT: lw a4, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, a7 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 6 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, ra +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, a5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 5 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a6, a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s7 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vand.vx v16, v8, s10 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s11 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s9 +; RV32-NEXT: vmul.vv v28, v8, v16 +; RV32-NEXT: vand.vx v16, v8, s8 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vand.vx v20, v8, s6 +; RV32-NEXT: vmul.vv v4, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a0 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v0, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v24, v0, v24 +; RV32-NEXT: vxor.vv v12, v24, v12 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vand.vx v24, v0, a4 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v12, v12, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v12, v12, v24 +; RV32-NEXT: vxor.vv v12, v12, v28 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vi v16, v0, 8 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vxor.vv v12, v12, v4 +; RV32-NEXT: vxor.vv v12, v12, v20 +; RV32-NEXT: vsrl.vi v12, v12, 24 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vi v12, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 80 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; RV64-NEXT: vsrl.vi v12, v8, 8 +; RV64-NEXT: lui a5, 16 +; RV64-NEXT: vsrl.vi v16, v8, 24 +; RV64-NEXT: vsll.vi v20, v8, 24 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui ra, 349525 +; RV64-NEXT: li s9, 16 +; RV64-NEXT: li s8, 32 +; RV64-NEXT: li s6, 64 +; RV64-NEXT: li a7, 512 +; RV64-NEXT: li t0, 1024 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: lui t1, 1 +; RV64-NEXT: lui t2, 2 +; RV64-NEXT: lui t3, 4 +; RV64-NEXT: lui t4, 8 +; RV64-NEXT: lui t5, 32 +; RV64-NEXT: lui t6, 64 +; RV64-NEXT: lui s0, 128 +; RV64-NEXT: lui s1, 256 +; RV64-NEXT: lui s2, 512 +; RV64-NEXT: lui s3, 1024 +; RV64-NEXT: lui s4, 2048 +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: lui s7, 8192 +; RV64-NEXT: lui s10, 16384 +; RV64-NEXT: lui s11, 32768 +; RV64-NEXT: addi a4, a5, -256 +; RV64-NEXT: addi a3, a1, -241 +; RV64-NEXT: addi a2, a2, 819 +; RV64-NEXT: addi a1, ra, 1365 +; RV64-NEXT: vand.vx v12, v12, a4 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vor.vv v12, v12, v16 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v20, v8 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v12, v12, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v12, v12, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vand.vx v12, v8, s9 +; RV64-NEXT: lui s9, 65536 +; RV64-NEXT: vand.vx v16, v8, s8 +; RV64-NEXT: lui s8, 131072 +; RV64-NEXT: vand.vx v20, v8, s6 +; RV64-NEXT: lui s6, 262144 +; RV64-NEXT: slli ra, a0, 11 +; RV64-NEXT: vand.vi v24, v8, 2 +; RV64-NEXT: vand.vi v28, v8, 1 +; RV64-NEXT: vand.vi v4, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v24, v8, v24 +; RV64-NEXT: sd a4, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v20 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: li a6, 128 +; RV64-NEXT: vand.vx v12, v8, a6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: mv a6, a4 +; RV64-NEXT: slli a4, a4, 1 +; RV64-NEXT: add a6, a6, a4 +; RV64-NEXT: slli a4, a4, 3 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill +; RV64-NEXT: li a6, 256 +; RV64-NEXT: vand.vx v12, v8, a6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: mv a4, a6 +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: add a6, a6, a4 +; RV64-NEXT: ld a4, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a7 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 4 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t0 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 6 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, ra +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 4 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s0 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a6, a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: addi a5, sp, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s7 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: vand.vx v16, v8, s10 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s11 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s9 +; RV64-NEXT: vmul.vv v28, v8, v16 +; RV64-NEXT: vand.vx v16, v8, s8 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: vand.vx v20, v8, s6 +; RV64-NEXT: vmul.vv v4, v8, v20 +; RV64-NEXT: vand.vx v20, v8, a0 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v0, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v0, v24 +; RV64-NEXT: vxor.vv v12, v24, v12 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vand.vx v24, v0, a4 +; RV64-NEXT: vsll.vi v24, v24, 8 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: vxor.vv v12, v12, v28 +; RV64-NEXT: vxor.vv v12, v12, v16 +; RV64-NEXT: vsrl.vi v16, v0, 8 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vxor.vv v12, v12, v4 +; RV64-NEXT: vxor.vv v12, v12, v20 +; RV64-NEXT: vsrl.vi v12, v12, 24 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v12, v12, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v12, v12, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 +; RV64-NEXT: ret + %a = call @llvm.clmulr.nxv8i32( %x, %x) + ret %a +} + +define @clmulr_nxv16i32( %x, %y) nounwind { +; RV32-LABEL: clmulr_nxv16i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -80 +; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: lui a5, 16 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsll.vi v0, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: lui ra, 349525 +; RV32-NEXT: li t5, 16 +; RV32-NEXT: li t2, 32 +; RV32-NEXT: li a7, 64 +; RV32-NEXT: li t0, 512 +; RV32-NEXT: li t1, 1024 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: lui t3, 1 +; RV32-NEXT: lui t4, 2 +; RV32-NEXT: lui t6, 4 +; RV32-NEXT: lui s0, 8 +; RV32-NEXT: lui s1, 32 +; RV32-NEXT: lui s2, 64 +; RV32-NEXT: lui s3, 128 +; RV32-NEXT: lui s4, 256 +; RV32-NEXT: lui s5, 512 +; RV32-NEXT: lui s6, 1024 +; RV32-NEXT: lui s7, 2048 +; RV32-NEXT: lui s8, 4096 +; RV32-NEXT: lui s9, 8192 +; RV32-NEXT: lui s10, 16384 +; RV32-NEXT: lui s11, 32768 +; RV32-NEXT: addi a4, a5, -256 +; RV32-NEXT: addi a3, a1, -241 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: addi a1, ra, 1365 +; RV32-NEXT: slli a0, a0, 11 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v0, v8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v16, v16, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vand.vi v16, v8, 2 +; RV32-NEXT: vand.vi v24, v8, 1 +; RV32-NEXT: vand.vi v0, v8, 4 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vand.vi v16, v8, 8 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui ra, 65536 +; RV32-NEXT: vand.vx v16, v8, t5 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui t5, 131072 +; RV32-NEXT: vand.vx v16, v8, t2 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui t2, 262144 +; RV32-NEXT: vand.vx v16, v8, a7 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui a7, 524288 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: vand.vx v16, v8, a6 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: li a6, 256 +; RV32-NEXT: vand.vx v16, v8, a6 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv a0, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: mv t0, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add t0, t0, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t1 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv t0, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add t0, t0, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t3 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a6, a6, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t4 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t6 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a6, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, a5 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s1 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s2 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s3 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s4 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s5 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s6 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s7 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s8 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s9 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s10 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s11 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, ra +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t5 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t2 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, a7 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v24, v8 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v16, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vand.vx v24, v8, a4 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v24, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v24, v24, v0 +; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsrl.vi v24, v24, 24 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v16, v16, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 80 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_nxv16i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV64-NEXT: vsrl.vi v16, v8, 8 +; RV64-NEXT: lui a5, 16 +; RV64-NEXT: vsrl.vi v24, v8, 24 +; RV64-NEXT: vsll.vi v0, v8, 24 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui ra, 349525 +; RV64-NEXT: li t5, 16 +; RV64-NEXT: li t2, 32 +; RV64-NEXT: li a7, 64 +; RV64-NEXT: li t0, 512 +; RV64-NEXT: li t1, 1024 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: lui t3, 1 +; RV64-NEXT: lui t4, 2 +; RV64-NEXT: lui t6, 4 +; RV64-NEXT: lui s0, 8 +; RV64-NEXT: lui s1, 32 +; RV64-NEXT: lui s2, 64 +; RV64-NEXT: lui s3, 128 +; RV64-NEXT: lui s4, 256 +; RV64-NEXT: lui s5, 512 +; RV64-NEXT: lui s6, 1024 +; RV64-NEXT: lui s7, 2048 +; RV64-NEXT: lui s8, 4096 +; RV64-NEXT: lui s9, 8192 +; RV64-NEXT: lui s10, 16384 +; RV64-NEXT: lui s11, 32768 +; RV64-NEXT: addi a4, a5, -256 +; RV64-NEXT: addi a3, a1, -241 +; RV64-NEXT: addi a2, a2, 819 +; RV64-NEXT: addi a1, ra, 1365 +; RV64-NEXT: slli a0, a0, 11 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v0, v8 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v16, v16, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v16, v16, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vand.vi v16, v8, 2 +; RV64-NEXT: vand.vi v24, v8, 1 +; RV64-NEXT: vand.vi v0, v8, 4 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v24 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vand.vi v16, v8, 8 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui ra, 65536 +; RV64-NEXT: vand.vx v16, v8, t5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui t5, 131072 +; RV64-NEXT: vand.vx v16, v8, t2 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui t2, 262144 +; RV64-NEXT: vand.vx v16, v8, a7 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui a7, 524288 +; RV64-NEXT: li a6, 128 +; RV64-NEXT: vand.vx v16, v8, a6 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: li a6, 256 +; RV64-NEXT: vand.vx v16, v8, a6 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: mv a0, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t0 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 4 +; RV64-NEXT: mv t0, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add t0, t0, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a6, a6, t0 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: mv t0, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add t0, t0, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a6, a6, t0 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t3 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a6, a6, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t4 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t6 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a6, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s0 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 7 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, a5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s2 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s3 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s4 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s6 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s7 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s8 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s9 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s10 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s11 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, ra +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t2 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, a7 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v24, v8 +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 7 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v24, v8, a4 +; RV64-NEXT: vsll.vi v24, v24, 8 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v0, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vsrl.vi v8, v8, 8 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vsrl.vi v24, v24, 24 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v16, v16, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v16, v16, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v16, v16, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 +; RV64-NEXT: ret + %a = call @llvm.clmulr.nxv16i32( %x, %y) + ret %a +} + +define @clmulr_nxv1i64( %x, %y) nounwind { +; RV32-LABEL: clmulr_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s7, 1044480 +; RV32-NEXT: lui a7, 524288 +; RV32-NEXT: li s11, 1 +; RV32-NEXT: li s8, 2 +; RV32-NEXT: li s9, 4 +; RV32-NEXT: li s10, 8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: li a5, 64 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: li ra, 256 +; RV32-NEXT: li a0, 512 +; RV32-NEXT: li a1, 1024 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: lui t0, 2 +; RV32-NEXT: lui t1, 4 +; RV32-NEXT: lui t2, 8 +; RV32-NEXT: lui t3, 16 +; RV32-NEXT: lui t4, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t6, 128 +; RV32-NEXT: lui s0, 256 +; RV32-NEXT: lui s1, 512 +; RV32-NEXT: lui s2, 1024 +; RV32-NEXT: lui s3, 2048 +; RV32-NEXT: lui s4, 4096 +; RV32-NEXT: lui s5, 8192 +; RV32-NEXT: lui s6, 16384 +; RV32-NEXT: sw s7, 272(sp) +; RV32-NEXT: lui s7, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw a7, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw s11, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw s8, 252(sp) +; RV32-NEXT: lui s8, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s9, 244(sp) +; RV32-NEXT: lui s9, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw s10, 236(sp) +; RV32-NEXT: lui s10, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw a3, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw a4, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw a5, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw a6, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw ra, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw a0, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a1, 180(sp) +; RV32-NEXT: slli s11, s11, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw s11, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a2, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw t0, 156(sp) +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw t1, 148(sp) +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw t2, 140(sp) +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw t3, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t4, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t5, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t6, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s0, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s1, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s2, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s3, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s4, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s5, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s6, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s7, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw s8, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw s9, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw s10, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw a7, 12(sp) +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v3, a0 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vmv.v.x v2, a0 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vmv.v.x v1, a0 +; RV32-NEXT: addi a0, sp, 272 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v0, (a0), zero +; RV32-NEXT: addi a0, sp, 264 +; RV32-NEXT: vlse64.v v13, (a0), zero +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vlse64.v v14, (a0), zero +; RV32-NEXT: addi a0, sp, 248 +; RV32-NEXT: vlse64.v v15, (a0), zero +; RV32-NEXT: addi a0, sp, 240 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: addi a0, sp, 232 +; RV32-NEXT: vlse64.v v17, (a0), zero +; RV32-NEXT: addi a0, sp, 224 +; RV32-NEXT: vlse64.v v18, (a0), zero +; RV32-NEXT: addi a0, sp, 216 +; RV32-NEXT: vlse64.v v19, (a0), zero +; RV32-NEXT: addi a0, sp, 208 +; RV32-NEXT: vlse64.v v20, (a0), zero +; RV32-NEXT: addi a0, sp, 200 +; RV32-NEXT: vlse64.v v21, (a0), zero +; RV32-NEXT: addi a0, sp, 192 +; RV32-NEXT: vlse64.v v22, (a0), zero +; RV32-NEXT: addi a0, sp, 184 +; RV32-NEXT: vlse64.v v23, (a0), zero +; RV32-NEXT: addi a0, sp, 176 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: addi a0, sp, 168 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: addi a0, sp, 160 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: addi a0, sp, 152 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: addi a0, sp, 144 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: addi a0, sp, 136 +; RV32-NEXT: vlse64.v v29, (a0), zero +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vlse64.v v30, (a0), zero +; RV32-NEXT: addi a0, sp, 120 +; RV32-NEXT: vlse64.v v31, (a0), zero +; RV32-NEXT: addi a0, sp, 112 +; RV32-NEXT: vlse64.v v11, (a0), zero +; RV32-NEXT: addi a0, sp, 104 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: vlse64.v v5, (a0), zero +; RV32-NEXT: addi a0, sp, 88 +; RV32-NEXT: vlse64.v v4, (a0), zero +; RV32-NEXT: li a6, 56 +; RV32-NEXT: vsrl.vi v27, v8, 24 +; RV32-NEXT: vsrl.vx v28, v8, a6 +; RV32-NEXT: li ra, 40 +; RV32-NEXT: vsrl.vx v7, v8, ra +; RV32-NEXT: vsll.vx v6, v8, a6 +; RV32-NEXT: addi a4, t3, -256 +; RV32-NEXT: vand.vx v7, v7, a4 +; RV32-NEXT: vor.vv v28, v7, v28 +; RV32-NEXT: vand.vx v7, v8, a4 +; RV32-NEXT: vsll.vx v7, v7, ra +; RV32-NEXT: vor.vv v7, v6, v7 +; RV32-NEXT: vsrl.vi v6, v8, 8 +; RV32-NEXT: lui a5, 4080 +; RV32-NEXT: vand.vx v27, v27, a5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v6, v6, v0 +; RV32-NEXT: vor.vv v27, v6, v27 +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: vlse64.v v6, (a3), zero +; RV32-NEXT: vor.vv v27, v27, v28 +; RV32-NEXT: vand.vx v28, v8, a5 +; RV32-NEXT: vsll.vi v28, v28, 24 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v28, v8 +; RV32-NEXT: addi a3, sp, 72 +; RV32-NEXT: vlse64.v v28, (a3), zero +; RV32-NEXT: vor.vv v8, v7, v8 +; RV32-NEXT: addi a3, sp, 64 +; RV32-NEXT: vlse64.v v7, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v27 +; RV32-NEXT: vsrl.vi v27, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v3 +; RV32-NEXT: vand.vv v27, v27, v3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: vsrl.vi v27, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v2 +; RV32-NEXT: vand.vv v27, v27, v2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: vsrl.vi v27, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v1 +; RV32-NEXT: vand.vv v27, v27, v1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: addi a3, sp, 56 +; RV32-NEXT: vlse64.v v27, (a3), zero +; RV32-NEXT: vand.vv v13, v8, v13 +; RV32-NEXT: vand.vv v14, v8, v14 +; RV32-NEXT: vand.vv v15, v8, v15 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vv v17, v8, v17 +; RV32-NEXT: vand.vv v18, v8, v18 +; RV32-NEXT: vand.vv v19, v8, v19 +; RV32-NEXT: vand.vv v20, v8, v20 +; RV32-NEXT: vand.vv v21, v8, v21 +; RV32-NEXT: vand.vv v22, v8, v22 +; RV32-NEXT: vand.vv v23, v8, v23 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vand.vv v25, v8, v25 +; RV32-NEXT: vand.vv v26, v8, v26 +; RV32-NEXT: vand.vv v3, v8, v9 +; RV32-NEXT: vand.vv v2, v8, v10 +; RV32-NEXT: vand.vv v29, v8, v29 +; RV32-NEXT: vand.vv v30, v8, v30 +; RV32-NEXT: vand.vv v31, v8, v31 +; RV32-NEXT: vand.vv v0, v8, v11 +; RV32-NEXT: vand.vv v9, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v5, v8, v5 +; RV32-NEXT: vand.vv v4, v8, v4 +; RV32-NEXT: vand.vv v6, v8, v6 +; RV32-NEXT: vand.vv v9, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: addi a0, sp, 40 +; RV32-NEXT: vlse64.v v9, (a3), zero +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vand.vv v11, v8, v7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v11, v8, v27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a2), zero +; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 8 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 1024 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s11 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t3 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t5 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t6 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s3 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s5 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s6 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s7 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s8 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s9 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v1, v8, s10 +; RV32-NEXT: vmul.vv v1, v8, v1 +; RV32-NEXT: vmul.vv v9, v8, v13 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v14 +; RV32-NEXT: vmul.vv v11, v8, v15 +; RV32-NEXT: vmul.vv v12, v8, v16 +; RV32-NEXT: vmul.vv v13, v8, v17 +; RV32-NEXT: vmul.vv v14, v8, v18 +; RV32-NEXT: vmul.vv v15, v8, v19 +; RV32-NEXT: vmul.vv v16, v8, v20 +; RV32-NEXT: vmul.vv v17, v8, v21 +; RV32-NEXT: vmul.vv v18, v8, v22 +; RV32-NEXT: vmul.vv v19, v8, v23 +; RV32-NEXT: vmul.vv v20, v8, v24 +; RV32-NEXT: vmul.vv v21, v8, v25 +; RV32-NEXT: vmul.vv v22, v8, v26 +; RV32-NEXT: vmul.vv v23, v8, v3 +; RV32-NEXT: vmul.vv v24, v8, v2 +; RV32-NEXT: vmul.vv v25, v8, v29 +; RV32-NEXT: vmul.vv v26, v8, v30 +; RV32-NEXT: vmul.vv v27, v8, v31 +; RV32-NEXT: vmul.vv v28, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v29, v8, v29 +; RV32-NEXT: vmul.vv v30, v8, v5 +; RV32-NEXT: vmul.vv v31, v8, v4 +; RV32-NEXT: vmul.vv v7, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v5, v8, v5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v3, v8, v3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vxor.vv v8, v8, v1 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vxor.vv v8, v8, v11 +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v8, v13 +; RV32-NEXT: vxor.vv v8, v8, v14 +; RV32-NEXT: vxor.vv v8, v8, v15 +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v17 +; RV32-NEXT: vxor.vv v8, v8, v18 +; RV32-NEXT: vxor.vv v8, v8, v19 +; RV32-NEXT: vxor.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v8, v21 +; RV32-NEXT: vxor.vv v8, v8, v22 +; RV32-NEXT: vxor.vv v8, v8, v23 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v25 +; RV32-NEXT: vxor.vv v8, v8, v26 +; RV32-NEXT: vxor.vv v8, v8, v27 +; RV32-NEXT: vxor.vv v8, v8, v28 +; RV32-NEXT: vxor.vv v8, v8, v29 +; RV32-NEXT: vxor.vv v8, v8, v30 +; RV32-NEXT: vxor.vv v8, v8, v31 +; RV32-NEXT: vxor.vv v8, v8, v7 +; RV32-NEXT: vxor.vv v8, v8, v6 +; RV32-NEXT: vxor.vv v8, v8, v5 +; RV32-NEXT: vxor.vv v8, v8, v4 +; RV32-NEXT: vxor.vv v8, v8, v3 +; RV32-NEXT: vxor.vv v8, v8, v2 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vx v9, v8, a6 +; RV32-NEXT: vsll.vx v10, v8, a6 +; RV32-NEXT: vsrl.vx v11, v8, ra +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vand.vx v11, v11, a4 +; RV32-NEXT: vsrl.vi v13, v8, 24 +; RV32-NEXT: vand.vx v14, v8, a5 +; RV32-NEXT: vand.vx v13, v13, a5 +; RV32-NEXT: vsll.vx v12, v12, ra +; RV32-NEXT: vsrl.vi v15, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v15, v15, v16 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vor.vv v11, v15, v13 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vsll.vi v13, v14, 24 +; RV32-NEXT: vor.vv v8, v13, v8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -224 +; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: vsrl.vi v9, v8, 8 +; RV64-NEXT: li t2, 255 +; RV64-NEXT: lui t6, 61681 +; RV64-NEXT: lui s0, 209715 +; RV64-NEXT: lui s1, 349525 +; RV64-NEXT: li s10, 16 +; RV64-NEXT: li s9, 32 +; RV64-NEXT: li s8, 64 +; RV64-NEXT: li s5, 128 +; RV64-NEXT: li s6, 256 +; RV64-NEXT: li t5, 512 +; RV64-NEXT: li t3, 1024 +; RV64-NEXT: li t0, 1 +; RV64-NEXT: lui s7, 1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: lui t4, 4 +; RV64-NEXT: lui t1, 8 +; RV64-NEXT: lui a7, 32 +; RV64-NEXT: lui a6, 64 +; RV64-NEXT: lui a5, 128 +; RV64-NEXT: lui a4, 256 +; RV64-NEXT: lui a3, 512 +; RV64-NEXT: lui a2, 1024 +; RV64-NEXT: li s11, 56 +; RV64-NEXT: vsrl.vx v11, v8, s11 +; RV64-NEXT: li ra, 40 +; RV64-NEXT: vsrl.vx v12, v8, ra +; RV64-NEXT: addi t6, t6, -241 +; RV64-NEXT: addi s2, s0, 819 +; RV64-NEXT: addi s3, s1, 1365 +; RV64-NEXT: slli s1, t6, 32 +; RV64-NEXT: add s4, t6, s1 +; RV64-NEXT: slli t6, s2, 32 +; RV64-NEXT: add s2, s2, t6 +; RV64-NEXT: slli t6, s3, 32 +; RV64-NEXT: add s3, s3, t6 +; RV64-NEXT: lui s0, 16 +; RV64-NEXT: addi s1, s0, -256 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: slli t6, t2, 24 +; RV64-NEXT: vand.vx v13, v8, a0 +; RV64-NEXT: vsll.vx v14, v8, s11 +; RV64-NEXT: vand.vx v12, v12, s1 +; RV64-NEXT: vand.vx v9, v9, t6 +; RV64-NEXT: vsll.vi v13, v13, 24 +; RV64-NEXT: vand.vx v15, v8, t6 +; RV64-NEXT: vand.vx v8, v8, s1 +; RV64-NEXT: vor.vv v11, v12, v11 +; RV64-NEXT: vor.vv v9, v9, v10 +; RV64-NEXT: vsll.vi v10, v15, 8 +; RV64-NEXT: vsll.vx v8, v8, ra +; RV64-NEXT: vor.vv v9, v9, v11 +; RV64-NEXT: vor.vv v10, v13, v10 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vand.vx v9, v9, s4 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: vand.vx v8, v8, s2 +; RV64-NEXT: vand.vx v9, v9, s2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vx v8, v8, s3 +; RV64-NEXT: vand.vx v9, v9, s3 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vand.vx v7, v8, s10 +; RV64-NEXT: lui t2, 4096 +; RV64-NEXT: vand.vx v6, v8, s9 +; RV64-NEXT: lui s9, 8192 +; RV64-NEXT: vand.vx v5, v8, s8 +; RV64-NEXT: lui s8, 16384 +; RV64-NEXT: vand.vx v4, v8, s5 +; RV64-NEXT: lui s10, 32768 +; RV64-NEXT: vand.vx v13, v8, s6 +; RV64-NEXT: lui s11, 65536 +; RV64-NEXT: vand.vx v14, v8, t5 +; RV64-NEXT: lui t5, 131072 +; RV64-NEXT: vand.vx v15, v8, t3 +; RV64-NEXT: slli t3, t0, 11 +; RV64-NEXT: vand.vx v16, v8, t3 +; RV64-NEXT: lui t3, 262144 +; RV64-NEXT: vand.vx v17, v8, s7 +; RV64-NEXT: slli a0, t0, 31 +; RV64-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v18, v8, a1 +; RV64-NEXT: slli a0, t0, 32 +; RV64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v19, v8, t4 +; RV64-NEXT: slli a0, t0, 33 +; RV64-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v20, v8, t1 +; RV64-NEXT: slli a0, t0, 34 +; RV64-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v21, v8, s0 +; RV64-NEXT: slli a0, t0, 35 +; RV64-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v22, v8, a7 +; RV64-NEXT: slli a0, t0, 36 +; RV64-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v23, v8, a6 +; RV64-NEXT: slli a0, t0, 37 +; RV64-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v24, v8, a5 +; RV64-NEXT: slli a0, t0, 38 +; RV64-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v25, v8, a4 +; RV64-NEXT: slli a0, t0, 39 +; RV64-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v26, v8, a3 +; RV64-NEXT: slli a0, t0, 40 +; RV64-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v27, v8, a2 +; RV64-NEXT: slli a0, t0, 41 +; RV64-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a0, 2048 +; RV64-NEXT: vand.vx v28, v8, a0 +; RV64-NEXT: slli s5, t0, 42 +; RV64-NEXT: vand.vx v29, v8, t2 +; RV64-NEXT: slli s6, t0, 43 +; RV64-NEXT: vand.vx v30, v8, s9 +; RV64-NEXT: slli s7, t0, 44 +; RV64-NEXT: vand.vx v10, v8, s8 +; RV64-NEXT: slli s8, t0, 45 +; RV64-NEXT: vand.vx v11, v8, s10 +; RV64-NEXT: slli s9, t0, 46 +; RV64-NEXT: vand.vx v12, v8, s11 +; RV64-NEXT: slli s10, t0, 47 +; RV64-NEXT: vand.vx v9, v8, t5 +; RV64-NEXT: slli s11, t0, 48 +; RV64-NEXT: vand.vx v31, v8, t3 +; RV64-NEXT: slli ra, t0, 49 +; RV64-NEXT: slli t5, t0, 50 +; RV64-NEXT: slli t4, t0, 51 +; RV64-NEXT: slli t3, t0, 52 +; RV64-NEXT: slli t2, t0, 53 +; RV64-NEXT: slli t1, t0, 54 +; RV64-NEXT: slli a7, t0, 55 +; RV64-NEXT: slli a6, t0, 56 +; RV64-NEXT: slli a5, t0, 57 +; RV64-NEXT: slli a4, t0, 58 +; RV64-NEXT: slli a3, t0, 59 +; RV64-NEXT: slli a2, t0, 60 +; RV64-NEXT: slli a1, t0, 61 +; RV64-NEXT: slli t0, t0, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vi v3, v8, 2 +; RV64-NEXT: vand.vi v2, v8, 1 +; RV64-NEXT: vand.vi v1, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v3, v8, v3 +; RV64-NEXT: sd t6, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v3, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v3, v8, v2 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v3, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v3, v8, v1 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v3, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vmul.vv v7, v8, v7 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v7, v8, v6 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v7, v8, v5 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v7, v8, v4 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v13 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v14 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v15 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 4 +; RV64-NEXT: add t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v16 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v17 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 4 +; RV64-NEXT: sub t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v18 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v19 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v20 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v21 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v22 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v23 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 3 +; RV64-NEXT: add t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v24 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v25 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 3 +; RV64-NEXT: sub t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v26 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v27 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 2 +; RV64-NEXT: add t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v28 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v29 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 1 +; RV64-NEXT: add t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v13, v8, v30 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 4 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v10, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v11 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 5 +; RV64-NEXT: add t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v10, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v12 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 5 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v10, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s0, t6, 5 +; RV64-NEXT: sub t6, s0, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v31 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s0, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s0, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: slli s0, s0, 1 +; RV64-NEXT: mv t6, s0 +; RV64-NEXT: slli s0, s0, 2 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: slli s0, s0, 1 +; RV64-NEXT: add s0, s0, t6 +; RV64-NEXT: ld t6, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 112 +; RV64-NEXT: vs1r.v v9, (s0) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s0, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 112 +; RV64-NEXT: vs1r.v v9, (s0) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: addi s0, sp, 112 +; RV64-NEXT: vs1r.v v9, (s0) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s0, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v4, v8, v9 +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v5, v8, v9 +; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v6, v8, v9 +; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v7, v8, v9 +; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v31, v8, v9 +; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s0 +; RV64-NEXT: vmul.vv v30, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s5 +; RV64-NEXT: vmul.vv v29, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s6 +; RV64-NEXT: vmul.vv v28, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s7 +; RV64-NEXT: vmul.vv v27, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s8 +; RV64-NEXT: vmul.vv v26, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s9 +; RV64-NEXT: vmul.vv v25, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s10 +; RV64-NEXT: vmul.vv v23, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s11 +; RV64-NEXT: vmul.vv v19, v8, v9 +; RV64-NEXT: vand.vx v9, v8, ra +; RV64-NEXT: vmul.vv v14, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t5 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: vand.vx v10, v8, t4 +; RV64-NEXT: vmul.vv v24, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t3 +; RV64-NEXT: vmul.vv v22, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t2 +; RV64-NEXT: vmul.vv v20, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t1 +; RV64-NEXT: vmul.vv v15, v8, v10 +; RV64-NEXT: vand.vx v10, v8, a7 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vand.vx v11, v8, a6 +; RV64-NEXT: vmul.vv v16, v8, v11 +; RV64-NEXT: vand.vx v11, v8, a5 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vand.vx v12, v8, a4 +; RV64-NEXT: vmul.vv v21, v8, v12 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vmul.vv v17, v8, v12 +; RV64-NEXT: vand.vx v12, v8, a2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: vand.vx v13, v8, a1 +; RV64-NEXT: vmul.vv v18, v8, v13 +; RV64-NEXT: vand.vx v13, v8, t0 +; RV64-NEXT: vmul.vv v13, v8, v13 +; RV64-NEXT: vand.vx v2, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v1 +; RV64-NEXT: vxor.vv v2, v2, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v2, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v1, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v3 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsll.vx v2, v2, a0 +; RV64-NEXT: vand.vx v1, v1, s1 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsll.vx v1, v1, a1 +; RV64-NEXT: vor.vv v2, v2, v1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v1, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v0, v1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 5 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 5 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl1r.v v3, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v3 +; RV64-NEXT: addi a2, sp, 112 +; RV64-NEXT: vl1r.v v3, (a2) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v3, v1, v3 +; RV64-NEXT: vxor.vv v4, v3, v4 +; RV64-NEXT: vxor.vv v5, v4, v5 +; RV64-NEXT: vxor.vv v6, v5, v6 +; RV64-NEXT: vxor.vv v7, v6, v7 +; RV64-NEXT: vxor.vv v31, v7, v31 +; RV64-NEXT: vxor.vv v30, v31, v30 +; RV64-NEXT: vxor.vv v29, v30, v29 +; RV64-NEXT: vxor.vv v28, v29, v28 +; RV64-NEXT: vxor.vv v27, v28, v27 +; RV64-NEXT: vxor.vv v26, v27, v26 +; RV64-NEXT: vxor.vv v25, v26, v25 +; RV64-NEXT: vxor.vv v23, v25, v23 +; RV64-NEXT: vxor.vv v19, v23, v19 +; RV64-NEXT: vxor.vv v14, v19, v14 +; RV64-NEXT: vxor.vv v9, v14, v9 +; RV64-NEXT: vsrl.vi v14, v7, 8 +; RV64-NEXT: vand.vx v14, v14, t6 +; RV64-NEXT: vsrl.vi v19, v23, 24 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v19, v19, a2 +; RV64-NEXT: vor.vv v14, v14, v19 +; RV64-NEXT: vxor.vv v9, v9, v24 +; RV64-NEXT: vxor.vv v9, v9, v22 +; RV64-NEXT: vxor.vv v9, v9, v20 +; RV64-NEXT: vxor.vv v9, v9, v15 +; RV64-NEXT: vxor.vv v9, v9, v10 +; RV64-NEXT: vand.vx v10, v7, a2 +; RV64-NEXT: vsll.vi v10, v10, 24 +; RV64-NEXT: vxor.vv v15, v9, v16 +; RV64-NEXT: vxor.vv v11, v15, v11 +; RV64-NEXT: vand.vx v15, v9, t6 +; RV64-NEXT: vsll.vi v15, v15, 8 +; RV64-NEXT: vor.vv v10, v10, v15 +; RV64-NEXT: vxor.vv v11, v11, v21 +; RV64-NEXT: vor.vv v10, v2, v10 +; RV64-NEXT: vxor.vv v11, v11, v17 +; RV64-NEXT: vxor.vv v11, v11, v12 +; RV64-NEXT: vsrl.vx v9, v9, a1 +; RV64-NEXT: vand.vx v9, v9, s1 +; RV64-NEXT: vxor.vv v11, v11, v18 +; RV64-NEXT: vxor.vv v11, v11, v13 +; RV64-NEXT: vxor.vv v8, v11, v8 +; RV64-NEXT: vsrl.vx v8, v8, a0 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vand.vx v9, v9, s4 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: vand.vx v8, v8, s2 +; RV64-NEXT: vand.vx v9, v9, s2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vx v8, v8, s3 +; RV64-NEXT: vand.vx v9, v9, s3 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 224 +; RV64-NEXT: ret + %a = call @llvm.clmulr.nxv1i64( %x, %y) + ret %a +} + +define @clmulr_nxv2i64( %x, %y) nounwind { +; RV32-LABEL: clmulr_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s7, 1044480 +; RV32-NEXT: lui a7, 524288 +; RV32-NEXT: li a1, 1 +; RV32-NEXT: li s8, 2 +; RV32-NEXT: li s9, 4 +; RV32-NEXT: li s10, 8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: li a5, 64 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: li s11, 256 +; RV32-NEXT: li ra, 512 +; RV32-NEXT: li a0, 1024 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: lui t0, 2 +; RV32-NEXT: lui t1, 4 +; RV32-NEXT: lui t2, 8 +; RV32-NEXT: lui t3, 16 +; RV32-NEXT: lui t4, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t6, 128 +; RV32-NEXT: lui s0, 256 +; RV32-NEXT: lui s1, 512 +; RV32-NEXT: lui s2, 1024 +; RV32-NEXT: lui s3, 2048 +; RV32-NEXT: lui s4, 4096 +; RV32-NEXT: lui s5, 8192 +; RV32-NEXT: lui s6, 16384 +; RV32-NEXT: sw s7, 272(sp) +; RV32-NEXT: lui s7, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw a7, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a1, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw s8, 252(sp) +; RV32-NEXT: lui s8, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s9, 244(sp) +; RV32-NEXT: lui s9, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw s10, 236(sp) +; RV32-NEXT: lui s10, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw a3, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw a4, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw a5, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw a6, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s11, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw ra, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a0, 180(sp) +; RV32-NEXT: slli a5, a1, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw a5, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a2, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw t0, 156(sp) +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw t1, 148(sp) +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw t2, 140(sp) +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw t3, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t4, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t5, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t6, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s0, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s1, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s2, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s3, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s4, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s5, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s6, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s7, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw s8, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw s9, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw s10, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw a7, 12(sp) +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v4, a0 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vmv.v.x v2, a0 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: addi a0, sp, 272 +; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v6, (a0), zero +; RV32-NEXT: addi a0, sp, 264 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: addi a0, sp, 248 +; RV32-NEXT: vlse64.v v14, (a0), zero +; RV32-NEXT: addi a0, sp, 240 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: addi a0, sp, 232 +; RV32-NEXT: vlse64.v v18, (a0), zero +; RV32-NEXT: addi a0, sp, 224 +; RV32-NEXT: vlse64.v v20, (a0), zero +; RV32-NEXT: addi a0, sp, 216 +; RV32-NEXT: vlse64.v v22, (a0), zero +; RV32-NEXT: li ra, 56 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vx v26, v8, ra +; RV32-NEXT: li s11, 40 +; RV32-NEXT: vsrl.vx v28, v8, s11 +; RV32-NEXT: vsll.vx v30, v8, ra +; RV32-NEXT: addi a4, t3, -256 +; RV32-NEXT: vand.vx v28, v28, a4 +; RV32-NEXT: vor.vv v26, v28, v26 +; RV32-NEXT: vand.vx v28, v8, a4 +; RV32-NEXT: vsll.vx v28, v28, s11 +; RV32-NEXT: vor.vv v30, v30, v28 +; RV32-NEXT: vsrl.vi v28, v8, 8 +; RV32-NEXT: lui a6, 4080 +; RV32-NEXT: vand.vx v24, v24, a6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v28, v28, v6 +; RV32-NEXT: vor.vv v28, v28, v24 +; RV32-NEXT: addi a3, sp, 208 +; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: vor.vv v10, v28, v26 +; RV32-NEXT: vand.vx v26, v8, a6 +; RV32-NEXT: vsll.vi v26, v26, 24 +; RV32-NEXT: vand.vv v8, v8, v6 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v26, v8 +; RV32-NEXT: addi a3, sp, 200 +; RV32-NEXT: vlse64.v v28, (a3), zero +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: addi a3, sp, 192 +; RV32-NEXT: vlse64.v v26, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v30, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v4 +; RV32-NEXT: vand.vv v30, v30, v4 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: vsrl.vi v30, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v2, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v2 +; RV32-NEXT: vand.vv v30, v30, v2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: vsrl.vi v30, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v0, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vand.vv v30, v30, v0 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: addi a3, sp, 184 +; RV32-NEXT: vlse64.v v30, (a3), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v6, v8, v10 +; RV32-NEXT: vand.vv v4, v8, v12 +; RV32-NEXT: vand.vv v2, v8, v14 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vand.vv v10, v8, v18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v28, v8, v28 +; RV32-NEXT: addi a3, sp, 176 +; RV32-NEXT: addi a0, sp, 168 +; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vand.vv v14, v8, v26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v14, v8, v30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a2, sp, 160 +; RV32-NEXT: addi a3, sp, 152 +; RV32-NEXT: addi a1, sp, 144 +; RV32-NEXT: addi a0, sp, 136 +; RV32-NEXT: vlse64.v v10, (a2), zero +; RV32-NEXT: vlse64.v v12, (a3), zero +; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: addi a1, sp, 120 +; RV32-NEXT: addi a2, sp, 112 +; RV32-NEXT: addi a3, sp, 104 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: addi a1, sp, 88 +; RV32-NEXT: addi a2, sp, 80 +; RV32-NEXT: addi a3, sp, 72 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: addi a1, sp, 56 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 2 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 1 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 4 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 8 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 1024 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t1 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t2 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t3 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t4 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t6 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s1 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s2 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s3 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s4 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s6 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s7 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s8 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s9 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s10 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v6 +; RV32-NEXT: vmul.vv v14, v8, v4 +; RV32-NEXT: vmul.vv v16, v8, v2 +; RV32-NEXT: vmul.vv v18, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v20, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v22, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v22, v8, v22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v26, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v30, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v8, v14 +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v18 +; RV32-NEXT: vxor.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v8, v22 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v26 +; RV32-NEXT: vxor.vv v8, v8, v28 +; RV32-NEXT: vxor.vv v8, v8, v30 +; RV32-NEXT: vxor.vv v8, v8, v6 +; RV32-NEXT: vxor.vv v8, v8, v4 +; RV32-NEXT: vxor.vv v8, v8, v2 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vx v10, v8, ra +; RV32-NEXT: vsll.vx v12, v8, ra +; RV32-NEXT: vsrl.vx v14, v8, s11 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vand.vx v14, v14, a4 +; RV32-NEXT: vsrl.vi v18, v8, 24 +; RV32-NEXT: vand.vx v20, v8, a6 +; RV32-NEXT: vand.vx v18, v18, a6 +; RV32-NEXT: vsll.vx v16, v16, s11 +; RV32-NEXT: vsrl.vi v22, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v22, v22, v24 +; RV32-NEXT: vor.vv v10, v14, v10 +; RV32-NEXT: vor.vv v14, v22, v18 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vsll.vi v18, v20, 24 +; RV32-NEXT: vor.vv v8, v18, v8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v14, v10 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -224 +; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: li s3, 40 +; RV64-NEXT: lui s1, 16 +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vsrl.vi v14, v8, 24 +; RV64-NEXT: vsrl.vi v10, v8, 8 +; RV64-NEXT: li t4, 255 +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: lui a6, 209715 +; RV64-NEXT: lui t6, 349525 +; RV64-NEXT: li t5, 16 +; RV64-NEXT: li t3, 32 +; RV64-NEXT: li t2, 64 +; RV64-NEXT: li t0, 128 +; RV64-NEXT: li t1, 256 +; RV64-NEXT: li a4, 512 +; RV64-NEXT: li a3, 1024 +; RV64-NEXT: li s0, 1 +; RV64-NEXT: lui a2, 1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: lui a0, 4 +; RV64-NEXT: li a7, 56 +; RV64-NEXT: vsrl.vx v12, v8, a7 +; RV64-NEXT: vsrl.vx v18, v8, s3 +; RV64-NEXT: addi s2, s1, -256 +; RV64-NEXT: lui s1, 4080 +; RV64-NEXT: vand.vx v16, v14, s1 +; RV64-NEXT: slli t4, t4, 24 +; RV64-NEXT: vand.vx v20, v8, s1 +; RV64-NEXT: vsll.vx v14, v8, a7 +; RV64-NEXT: addi a7, a5, -241 +; RV64-NEXT: addi a6, a6, 819 +; RV64-NEXT: addi a5, t6, 1365 +; RV64-NEXT: slli t6, s0, 11 +; RV64-NEXT: slli s1, s0, 31 +; RV64-NEXT: sd s1, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s1, s0, 32 +; RV64-NEXT: sd s1, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s1, s0, 33 +; RV64-NEXT: sd s1, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s1, s0, 34 +; RV64-NEXT: sd s1, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s1, s0, 35 +; RV64-NEXT: sd s1, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s1, s0, 36 +; RV64-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s1, a7, 32 +; RV64-NEXT: add a7, a7, s1 +; RV64-NEXT: slli s1, a6, 32 +; RV64-NEXT: add a6, a6, s1 +; RV64-NEXT: slli s1, a5, 32 +; RV64-NEXT: add a5, a5, s1 +; RV64-NEXT: slli s1, s0, 37 +; RV64-NEXT: sd s1, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v18, v18, s2 +; RV64-NEXT: vand.vx v10, v10, t4 +; RV64-NEXT: vsll.vi v20, v20, 24 +; RV64-NEXT: vand.vx v22, v8, t4 +; RV64-NEXT: vand.vx v8, v8, s2 +; RV64-NEXT: vor.vv v12, v18, v12 +; RV64-NEXT: vor.vv v10, v10, v16 +; RV64-NEXT: vsll.vi v16, v22, 8 +; RV64-NEXT: vsll.vx v8, v8, s3 +; RV64-NEXT: vor.vv v10, v10, v12 +; RV64-NEXT: vor.vv v12, v20, v16 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v10, v10, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v10, v10, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v10, v10, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, t5 +; RV64-NEXT: slli t5, s0, 38 +; RV64-NEXT: sd t5, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t3 +; RV64-NEXT: slli t3, s0, 39 +; RV64-NEXT: sd t3, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v14, v8, t2 +; RV64-NEXT: slli t2, s0, 40 +; RV64-NEXT: sd t2, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v24, v8, t0 +; RV64-NEXT: slli t0, s0, 41 +; RV64-NEXT: sd t0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t1 +; RV64-NEXT: slli s6, s0, 42 +; RV64-NEXT: vand.vx v18, v8, a4 +; RV64-NEXT: slli s7, s0, 43 +; RV64-NEXT: vand.vx v20, v8, a3 +; RV64-NEXT: slli s8, s0, 44 +; RV64-NEXT: vand.vx v22, v8, t6 +; RV64-NEXT: slli s9, s0, 45 +; RV64-NEXT: vand.vx v26, v8, a2 +; RV64-NEXT: slli s10, s0, 46 +; RV64-NEXT: vand.vx v28, v8, a1 +; RV64-NEXT: slli s11, s0, 47 +; RV64-NEXT: vand.vx v30, v8, a0 +; RV64-NEXT: slli ra, s0, 48 +; RV64-NEXT: slli s4, s0, 49 +; RV64-NEXT: slli s3, s0, 50 +; RV64-NEXT: slli s1, s0, 51 +; RV64-NEXT: slli t6, s0, 52 +; RV64-NEXT: slli t5, s0, 53 +; RV64-NEXT: slli t3, s0, 54 +; RV64-NEXT: slli t2, s0, 55 +; RV64-NEXT: slli t1, s0, 56 +; RV64-NEXT: slli t0, s0, 57 +; RV64-NEXT: slli a4, s0, 58 +; RV64-NEXT: slli a3, s0, 59 +; RV64-NEXT: slli a2, s0, 60 +; RV64-NEXT: slli a1, s0, 61 +; RV64-NEXT: slli s0, s0, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vi v6, v8, 2 +; RV64-NEXT: vand.vi v4, v8, 1 +; RV64-NEXT: vand.vi v2, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v6, v8, v6 +; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v4 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v2 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v14 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v24 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v18 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v20 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v22 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v26 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v28 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v30 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 8 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 16 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 32 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 64 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 128 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 256 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 512 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 1024 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 2048 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 8192 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 16384 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 32768 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 65536 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 131072 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 262144 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv a5, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s6 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s7 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s8 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s9 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s10 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s11 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, ra +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s4 +; RV64-NEXT: vmul.vv v20, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s3 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vand.vx v12, v8, s1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s1, vlenb +; RV64-NEXT: slli s1, s1, 3 +; RV64-NEXT: add s1, sp, s1 +; RV64-NEXT: addi s1, s1, 112 +; RV64-NEXT: vs2r.v v12, (s1) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs2r.v v12, (t6) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t5 +; RV64-NEXT: vmul.vv v6, v8, v12 +; RV64-NEXT: vand.vx v12, v8, t3 +; RV64-NEXT: vmul.vv v22, v8, v12 +; RV64-NEXT: vand.vx v12, v8, t2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: vand.vx v14, v8, t1 +; RV64-NEXT: vmul.vv v24, v8, v14 +; RV64-NEXT: vand.vx v14, v8, t0 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vand.vx v16, v8, a4 +; RV64-NEXT: vmul.vv v4, v8, v16 +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vmul.vv v2, v8, v16 +; RV64-NEXT: vand.vx v16, v8, a2 +; RV64-NEXT: vmul.vv v26, v8, v16 +; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: addi a1, sp, 112 +; RV64-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s0 +; RV64-NEXT: vmul.vv v18, v8, v16 +; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v28, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v28 +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v0, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v30, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vand.vx v0, v0, s2 +; RV64-NEXT: li a0, 40 +; RV64-NEXT: vsll.vx v0, v0, a0 +; RV64-NEXT: vor.vv v8, v8, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v0, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v28, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v28, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v30 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 112 +; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v30, v0, v30 +; RV64-NEXT: vxor.vv v20, v30, v20 +; RV64-NEXT: vxor.vv v10, v20, v10 +; RV64-NEXT: vsrl.vi v20, v28, 8 +; RV64-NEXT: vand.vx v20, v20, t4 +; RV64-NEXT: vsrl.vi v30, v0, 24 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v30, v30, a2 +; RV64-NEXT: vor.vv v20, v20, v30 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 112 +; RV64-NEXT: vl2r.v v30, (a3) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v10, v10, v30 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 112 +; RV64-NEXT: vl2r.v v30, (a3) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v10, v10, v30 +; RV64-NEXT: vxor.vv v10, v10, v6 +; RV64-NEXT: vxor.vv v10, v10, v22 +; RV64-NEXT: vxor.vv v10, v10, v12 +; RV64-NEXT: vand.vx v12, v28, a2 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vxor.vv v22, v10, v24 +; RV64-NEXT: vxor.vv v14, v22, v14 +; RV64-NEXT: vand.vx v22, v10, t4 +; RV64-NEXT: vsll.vi v22, v22, 8 +; RV64-NEXT: vor.vv v12, v12, v22 +; RV64-NEXT: vxor.vv v14, v14, v4 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vxor.vv v12, v14, v2 +; RV64-NEXT: vxor.vv v12, v12, v26 +; RV64-NEXT: vsrl.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v10, s2 +; RV64-NEXT: addi a0, sp, 112 +; RV64-NEXT: vl2r.v v14, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vxor.vv v12, v12, v18 +; RV64-NEXT: vxor.vv v12, v12, v16 +; RV64-NEXT: vsrl.vx v12, v12, a1 +; RV64-NEXT: vor.vv v10, v10, v12 +; RV64-NEXT: vor.vv v10, v20, v10 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v10, v10, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v10, v10, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v10, v10, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 224 +; RV64-NEXT: ret + %a = call @llvm.clmulr.nxv2i64( %x, %y) + ret %a +} + +define @clmulr_nxv4i64( %x, %y) nounwind { +; RV32-LABEL: clmulr_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s11, 1044480 +; RV32-NEXT: lui t6, 524288 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: li ra, 2 +; RV32-NEXT: li t4, 4 +; RV32-NEXT: li t2, 8 +; RV32-NEXT: li t5, 16 +; RV32-NEXT: li t3, 32 +; RV32-NEXT: li t1, 64 +; RV32-NEXT: li t0, 128 +; RV32-NEXT: li a7, 256 +; RV32-NEXT: li a6, 512 +; RV32-NEXT: li a3, 1024 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: lui a4, 2 +; RV32-NEXT: lui a1, 4 +; RV32-NEXT: lui a5, 8 +; RV32-NEXT: lui s0, 16 +; RV32-NEXT: lui s1, 32 +; RV32-NEXT: lui s2, 64 +; RV32-NEXT: lui s3, 128 +; RV32-NEXT: lui s4, 256 +; RV32-NEXT: lui s5, 512 +; RV32-NEXT: lui s6, 1024 +; RV32-NEXT: lui s7, 2048 +; RV32-NEXT: lui s8, 4096 +; RV32-NEXT: lui s9, 8192 +; RV32-NEXT: lui s10, 16384 +; RV32-NEXT: sw s11, 272(sp) +; RV32-NEXT: lui s11, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw t6, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a0, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw ra, 252(sp) +; RV32-NEXT: lui ra, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw t4, 244(sp) +; RV32-NEXT: lui t4, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw t2, 236(sp) +; RV32-NEXT: lui t2, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw t5, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw t3, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw t1, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw t0, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw a7, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw a6, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a3, 180(sp) +; RV32-NEXT: li t1, 1024 +; RV32-NEXT: slli a3, a0, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw a3, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a2, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw a4, 156(sp) +; RV32-NEXT: lui t3, 2 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw a1, 148(sp) +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw a5, 140(sp) +; RV32-NEXT: lui t5, 8 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s0, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw s1, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw s2, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw s3, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s4, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s5, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s6, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s7, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s8, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s9, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s10, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s11, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw ra, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw t4, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw t2, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw t6, 12(sp) +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v28, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vmv.v.x v4, a1 +; RV32-NEXT: addi a1, sp, 272 +; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v0, (a1), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v0, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a6, 56 +; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vsrl.vx v12, v8, a6 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsrl.vx v16, v8, a5 +; RV32-NEXT: vsll.vx v24, v8, a6 +; RV32-NEXT: addi a2, s0, -256 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v16, v16, v12 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v12, v12, a5 +; RV32-NEXT: vor.vv v12, v24, v12 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v20, v20, a4 +; RV32-NEXT: lui a7, 349525 +; RV32-NEXT: addi a7, a7, 1365 +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vor.vv v20, v24, v20 +; RV32-NEXT: vsetvli t0, zero, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v24, a7 +; RV32-NEXT: vsetvli a7, zero, e64, m4, ta, ma +; RV32-NEXT: vor.vv v16, v20, v16 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: addi a7, sp, 264 +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: addi a7, sp, 256 +; RV32-NEXT: vlse64.v v12, (a7), zero +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vand.vv v16, v16, v28 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v4 +; RV32-NEXT: vand.vv v16, v16, v4 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: addi a7, sp, 248 +; RV32-NEXT: vlse64.v v16, (a7), zero +; RV32-NEXT: vand.vv v28, v8, v20 +; RV32-NEXT: addi a7, sp, 240 +; RV32-NEXT: addi t0, sp, 232 +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vand.vv v4, v8, v12 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a7, sp, 224 +; RV32-NEXT: addi t0, sp, 216 +; RV32-NEXT: addi a1, sp, 208 +; RV32-NEXT: addi a0, sp, 200 +; RV32-NEXT: vlse64.v v12, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: vlse64.v v20, (a1), zero +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 192 +; RV32-NEXT: addi a1, sp, 184 +; RV32-NEXT: addi a7, sp, 176 +; RV32-NEXT: addi t0, sp, 168 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 160 +; RV32-NEXT: addi a1, sp, 152 +; RV32-NEXT: addi a7, sp, 144 +; RV32-NEXT: addi t0, sp, 136 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: addi a1, sp, 120 +; RV32-NEXT: addi a7, sp, 112 +; RV32-NEXT: addi t0, sp, 104 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: addi a1, sp, 88 +; RV32-NEXT: addi a7, sp, 80 +; RV32-NEXT: addi t0, sp, 72 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: addi a1, sp, 56 +; RV32-NEXT: addi a7, sp, 48 +; RV32-NEXT: addi t0, sp, 40 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: addi t0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 8 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: lui a0, 4 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s7 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s8 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s9 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s10 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s11 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, ra +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v16, v8, v28 +; RV32-NEXT: vmul.vv v20, v8, v4 +; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v28, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v28 +; RV32-NEXT: vxor.vv v8, v8, v4 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vx v12, v8, a6 +; RV32-NEXT: vsrl.vx v16, v8, a5 +; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v20, v20, a4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vsll.vx v24, v8, a6 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vsll.vx v8, v8, a5 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v8, v8, v20 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vi v12, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -240 +; RV64-NEXT: sd ra, 232(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 224(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: li t0, 40 +; RV64-NEXT: lui a7, 16 +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vsrl.vi v20, v8, 24 +; RV64-NEXT: vsrl.vi v12, v8, 8 +; RV64-NEXT: li t2, 255 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: lui a5, 349525 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: li s9, 1 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vsrl.vx v16, v8, a6 +; RV64-NEXT: vsrl.vx v28, v8, t0 +; RV64-NEXT: addi t6, a7, -256 +; RV64-NEXT: lui a7, 4080 +; RV64-NEXT: vand.vx v24, v20, a7 +; RV64-NEXT: slli t2, t2, 24 +; RV64-NEXT: vand.vx v4, v8, a7 +; RV64-NEXT: vsll.vx v20, v8, a6 +; RV64-NEXT: addi a7, a3, -241 +; RV64-NEXT: addi a6, a4, 819 +; RV64-NEXT: addi a5, a5, 1365 +; RV64-NEXT: slli a3, s9, 11 +; RV64-NEXT: sd a3, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 31 +; RV64-NEXT: sd a3, 104(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 32 +; RV64-NEXT: sd a3, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 33 +; RV64-NEXT: sd a3, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 34 +; RV64-NEXT: sd a3, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 35 +; RV64-NEXT: sd a3, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 36 +; RV64-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 37 +; RV64-NEXT: sd a3, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 38 +; RV64-NEXT: sd a3, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 39 +; RV64-NEXT: sd a3, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 40 +; RV64-NEXT: sd a3, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 41 +; RV64-NEXT: sd a3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s6, s9, 42 +; RV64-NEXT: slli s7, s9, 43 +; RV64-NEXT: slli a3, a7, 32 +; RV64-NEXT: add a7, a7, a3 +; RV64-NEXT: slli a3, a6, 32 +; RV64-NEXT: add a6, a6, a3 +; RV64-NEXT: slli a3, a5, 32 +; RV64-NEXT: add a5, a5, a3 +; RV64-NEXT: slli s8, s9, 44 +; RV64-NEXT: vand.vx v28, v28, t6 +; RV64-NEXT: vand.vx v12, v12, t2 +; RV64-NEXT: vsll.vi v4, v4, 24 +; RV64-NEXT: vand.vx v0, v8, t2 +; RV64-NEXT: vand.vx v8, v8, t6 +; RV64-NEXT: vor.vv v16, v28, v16 +; RV64-NEXT: vor.vv v12, v12, v24 +; RV64-NEXT: vsll.vi v24, v0, 8 +; RV64-NEXT: vsll.vx v8, v8, t0 +; RV64-NEXT: vor.vv v12, v12, v16 +; RV64-NEXT: vor.vv v16, v4, v24 +; RV64-NEXT: vor.vv v8, v20, v8 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v12, v12, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v12, v12, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v12, v12, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vand.vx v12, v8, a2 +; RV64-NEXT: slli s10, s9, 45 +; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: slli s11, s9, 46 +; RV64-NEXT: vand.vx v20, v8, a0 +; RV64-NEXT: slli ra, s9, 47 +; RV64-NEXT: slli s4, s9, 48 +; RV64-NEXT: slli s3, s9, 49 +; RV64-NEXT: slli s2, s9, 50 +; RV64-NEXT: slli s1, s9, 51 +; RV64-NEXT: slli s0, s9, 52 +; RV64-NEXT: slli t5, s9, 53 +; RV64-NEXT: slli t4, s9, 54 +; RV64-NEXT: slli t3, s9, 55 +; RV64-NEXT: slli t1, s9, 56 +; RV64-NEXT: slli t0, s9, 57 +; RV64-NEXT: slli a4, s9, 58 +; RV64-NEXT: slli a3, s9, 59 +; RV64-NEXT: slli a2, s9, 60 +; RV64-NEXT: slli a1, s9, 61 +; RV64-NEXT: slli s9, s9, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vi v24, v8, 2 +; RV64-NEXT: vand.vi v28, v8, 1 +; RV64-NEXT: vand.vi v4, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v24, v8, v24 +; RV64-NEXT: vmul.vv v28, v8, v28 +; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v28, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v28, v8, v4 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v28, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v20 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 128 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 256 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 512 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 1024 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 1 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 2 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 4 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 8 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 16 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 32 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 64 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 128 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 256 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 512 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 1024 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 2048 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 8192 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 16384 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 32768 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 65536 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 7 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 131072 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 262144 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv a5, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s7 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s8 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s10 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s11 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, ra +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s4, vlenb +; RV64-NEXT: slli s4, s4, 3 +; RV64-NEXT: mv s5, s4 +; RV64-NEXT: slli s4, s4, 2 +; RV64-NEXT: add s4, s4, s5 +; RV64-NEXT: add s4, sp, s4 +; RV64-NEXT: addi s4, s4, 128 +; RV64-NEXT: vs4r.v v12, (s4) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s3, vlenb +; RV64-NEXT: slli s3, s3, 2 +; RV64-NEXT: mv s4, s3 +; RV64-NEXT: slli s3, s3, 2 +; RV64-NEXT: add s3, s3, s4 +; RV64-NEXT: add s3, sp, s3 +; RV64-NEXT: addi s3, s3, 128 +; RV64-NEXT: vs4r.v v12, (s3) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s2 +; RV64-NEXT: vmul.vv v4, v8, v12 +; RV64-NEXT: vand.vx v12, v8, s1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s1, vlenb +; RV64-NEXT: slli s1, s1, 2 +; RV64-NEXT: mv s2, s1 +; RV64-NEXT: slli s1, s1, 2 +; RV64-NEXT: add s2, s2, s1 +; RV64-NEXT: slli s1, s1, 1 +; RV64-NEXT: add s1, s1, s2 +; RV64-NEXT: add s1, sp, s1 +; RV64-NEXT: addi s1, s1, 128 +; RV64-NEXT: vs4r.v v12, (s1) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s0 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: slli s0, s0, 2 +; RV64-NEXT: mv s1, s0 +; RV64-NEXT: slli s0, s0, 1 +; RV64-NEXT: add s1, s1, s0 +; RV64-NEXT: slli s0, s0, 2 +; RV64-NEXT: add s0, s0, s1 +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 128 +; RV64-NEXT: vs4r.v v12, (s0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t5, vlenb +; RV64-NEXT: slli t5, t5, 2 +; RV64-NEXT: mv s0, t5 +; RV64-NEXT: slli t5, t5, 3 +; RV64-NEXT: add t5, t5, s0 +; RV64-NEXT: add t5, sp, t5 +; RV64-NEXT: addi t5, t5, 128 +; RV64-NEXT: vs4r.v v12, (t5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t4, vlenb +; RV64-NEXT: slli t4, t4, 4 +; RV64-NEXT: add t4, sp, t4 +; RV64-NEXT: addi t4, t4, 128 +; RV64-NEXT: vs4r.v v12, (t4) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: slli t3, t3, 2 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 128 +; RV64-NEXT: vs4r.v v12, (t3) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t1 +; RV64-NEXT: vmul.vv v20, v8, v12 +; RV64-NEXT: vand.vx v12, v8, t0 +; RV64-NEXT: vmul.vv v16, v8, v12 +; RV64-NEXT: vand.vx v12, v8, a4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 128 +; RV64-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: mv a4, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a4, a4, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vs4r.v v12, (a3) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vs4r.v v12, (a2) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv a2, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s9 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v12 +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v12, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vand.vx v12, v12, t6 +; RV64-NEXT: li a0, 40 +; RV64-NEXT: vsll.vx v12, v12, a0 +; RV64-NEXT: vor.vv v12, v8, v12 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v0, v8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 7 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 6 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v0, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v28, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v28 +; RV64-NEXT: vxor.vv v24, v24, v4 +; RV64-NEXT: vsrl.vi v4, v8, 8 +; RV64-NEXT: vand.vx v4, v4, t2 +; RV64-NEXT: vsrl.vi v0, v0, 24 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v0, v0, a2 +; RV64-NEXT: vor.vv v4, v4, v0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: mv a4, a3 +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: add a4, a4, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl4r.v v0, (a3) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: mv a4, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a4, a4, a3 +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl4r.v v0, (a3) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: mv a4, a3 +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl4r.v v0, (a3) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl4r.v v28, (a3) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v28 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl4r.v v28, (a3) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v28 +; RV64-NEXT: vxor.vv v20, v24, v20 +; RV64-NEXT: vxor.vv v16, v20, v16 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vsll.vi v8, v8, 24 +; RV64-NEXT: vand.vx v20, v24, t2 +; RV64-NEXT: vsll.vi v20, v20, 8 +; RV64-NEXT: vor.vv v8, v8, v20 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v20, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v12, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v16, v12 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl4r.v v16, (a2) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v16 +; RV64-NEXT: vsrl.vx v16, v24, a0 +; RV64-NEXT: vand.vx v16, v16, t6 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a2, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v20 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v20 +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v12, v12, v20 +; RV64-NEXT: vsrl.vx v12, v12, a1 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vor.vv v12, v4, v12 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v12, v12, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v12, v12, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v12, v12, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 232(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 224(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 240 +; RV64-NEXT: ret + %a = call @llvm.clmulr.nxv4i64( %x, %y) + ret %a +} + +define @clmulr_nxv8i64( %x, %y) nounwind { +; RV32-LABEL: clmulr_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s11, 1044480 +; RV32-NEXT: lui s0, 524288 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: li ra, 2 +; RV32-NEXT: li t4, 4 +; RV32-NEXT: li t2, 8 +; RV32-NEXT: li t6, 16 +; RV32-NEXT: li t5, 32 +; RV32-NEXT: li t3, 64 +; RV32-NEXT: li t1, 128 +; RV32-NEXT: li t0, 256 +; RV32-NEXT: li a7, 512 +; RV32-NEXT: li a6, 1024 +; RV32-NEXT: lui a4, 1 +; RV32-NEXT: lui a3, 2 +; RV32-NEXT: lui a2, 4 +; RV32-NEXT: lui a5, 8 +; RV32-NEXT: lui s1, 16 +; RV32-NEXT: lui a1, 32 +; RV32-NEXT: lui s2, 64 +; RV32-NEXT: lui s3, 128 +; RV32-NEXT: lui s4, 256 +; RV32-NEXT: lui s5, 512 +; RV32-NEXT: lui s6, 1024 +; RV32-NEXT: lui s7, 2048 +; RV32-NEXT: lui s8, 4096 +; RV32-NEXT: lui s9, 8192 +; RV32-NEXT: lui s10, 16384 +; RV32-NEXT: sw s11, 272(sp) +; RV32-NEXT: lui s11, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw s0, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a0, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw ra, 252(sp) +; RV32-NEXT: lui ra, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw t4, 244(sp) +; RV32-NEXT: lui t4, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw t2, 236(sp) +; RV32-NEXT: lui t2, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw t6, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw t5, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw t3, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw t1, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw t0, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw a7, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a6, 180(sp) +; RV32-NEXT: li t1, 1024 +; RV32-NEXT: slli t6, a0, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw t6, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a4, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw a3, 156(sp) +; RV32-NEXT: lui t3, 2 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw a2, 148(sp) +; RV32-NEXT: lui t5, 4 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw a5, 140(sp) +; RV32-NEXT: lui a4, 8 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s1, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw a1, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw s2, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw s3, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s4, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s5, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s6, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s7, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s8, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s9, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s10, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s11, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw ra, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw t4, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw t2, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw s0, 12(sp) +; RV32-NEXT: li a6, 56 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a6 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsrl.vx v24, v8, a5 +; RV32-NEXT: vsll.vx v0, v8, a6 +; RV32-NEXT: addi a2, s1, -256 +; RV32-NEXT: vand.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: vsll.vx v24, v24, a5 +; RV32-NEXT: vor.vv v16, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a3, sp, 272 +; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: vand.vx v16, v0, a3 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vmv8r.v v0, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vor.vv v24, v24, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vor.vv v16, v24, v8 +; RV32-NEXT: vand.vx v24, v0, a3 +; RV32-NEXT: vsll.vi v24, v24, 24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: vsll.vi v0, v0, 8 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: lui a7, 61681 +; RV32-NEXT: addi a7, a7, -241 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsetvli t0, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui a7, 209715 +; RV32-NEXT: addi a7, a7, 819 +; RV32-NEXT: vsetvli t0, zero, e64, m8, ta, ma +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsetvli t0, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vsetvli a7, zero, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v16, v16, v8 +; RV32-NEXT: lui a7, 349525 +; RV32-NEXT: addi a7, a7, 1365 +; RV32-NEXT: vsetvli t0, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a7 +; RV32-NEXT: vsetvli a7, zero, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v16, 1 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vmv8r.v v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 9 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a7, sp, 264 +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v16, v16, v16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a7, sp, 256 +; RV32-NEXT: addi t0, sp, 248 +; RV32-NEXT: addi a1, sp, 240 +; RV32-NEXT: addi a0, sp, 232 +; RV32-NEXT: vlse64.v v16, (a7), zero +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 4 +; RV32-NEXT: mv s0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add s0, s0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add s0, s0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add s0, s0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add a7, a7, s0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs8r.v v16, (a7) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (t0), zero +; RV32-NEXT: vlse64.v v0, (a1), zero +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a0, sp, 224 +; RV32-NEXT: addi a1, sp, 216 +; RV32-NEXT: addi a7, sp, 208 +; RV32-NEXT: addi t0, sp, 200 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v0, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a0, sp, 192 +; RV32-NEXT: addi a1, sp, 184 +; RV32-NEXT: addi a7, sp, 176 +; RV32-NEXT: addi t0, sp, 168 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v0, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a0, sp, 160 +; RV32-NEXT: addi a1, sp, 152 +; RV32-NEXT: addi a7, sp, 144 +; RV32-NEXT: addi t0, sp, 136 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v0, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: addi a1, sp, 120 +; RV32-NEXT: addi a7, sp, 112 +; RV32-NEXT: addi t0, sp, 104 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v0, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: addi a1, sp, 88 +; RV32-NEXT: addi a7, sp, 80 +; RV32-NEXT: addi t0, sp, 72 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v0, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: addi a1, sp, 56 +; RV32-NEXT: addi a7, sp, 48 +; RV32-NEXT: addi t0, sp, 40 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s0, s0, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v0, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: addi t0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v0, (a7), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vi v16, v8, 2 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vi v16, v8, 1 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vi v16, v8, 4 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vi v16, v8, 8 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t1 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t6 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t3 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t5 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s1 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: lui a0, 32 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s2 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s3 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s4 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s5 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s6 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s7 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s8 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s9 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s10 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, s11 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, ra +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t4 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vand.vx v16, v8, t2 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v16, v8, a5 +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vsrl.vx v24, v8, a6 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vsll.vi v0, v0, 8 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vsll.vx v0, v8, a6 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vsll.vx v8, v8, a5 +; RV32-NEXT: vor.vv v8, v0, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 9 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -240 +; RV64-NEXT: sd ra, 232(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 224(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: li a2, 40 +; RV64-NEXT: lui a3, 16 +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64-NEXT: vsrl.vi v24, v8, 24 +; RV64-NEXT: vsrl.vx v16, v8, a1 +; RV64-NEXT: li a5, 56 +; RV64-NEXT: vsrl.vx v0, v8, a2 +; RV64-NEXT: li s5, 40 +; RV64-NEXT: addi s4, a3, -256 +; RV64-NEXT: vand.vx v0, v0, s4 +; RV64-NEXT: vor.vv v16, v0, v16 +; RV64-NEXT: vsrl.vi v0, v8, 8 +; RV64-NEXT: li a4, 255 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 349525 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: lui a6, 4080 +; RV64-NEXT: vand.vx v24, v24, a6 +; RV64-NEXT: slli a4, a4, 24 +; RV64-NEXT: vand.vx v0, v0, a4 +; RV64-NEXT: vor.vv v24, v0, v24 +; RV64-NEXT: vand.vx v0, v8, a6 +; RV64-NEXT: vsll.vi v0, v0, 24 +; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: vand.vx v24, v8, a4 +; RV64-NEXT: vsll.vi v24, v24, 8 +; RV64-NEXT: vor.vv v24, v0, v24 +; RV64-NEXT: vsll.vx v0, v8, a5 +; RV64-NEXT: addi a7, a1, -241 +; RV64-NEXT: addi a6, a2, 819 +; RV64-NEXT: addi a5, a3, 1365 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s6, a0, 42 +; RV64-NEXT: slli s7, a0, 43 +; RV64-NEXT: slli s8, a0, 44 +; RV64-NEXT: slli s9, a0, 45 +; RV64-NEXT: slli s10, a0, 46 +; RV64-NEXT: slli a1, a7, 32 +; RV64-NEXT: add a7, a7, a1 +; RV64-NEXT: slli a1, a6, 32 +; RV64-NEXT: add a6, a6, a1 +; RV64-NEXT: slli a1, a5, 32 +; RV64-NEXT: add a5, a5, a1 +; RV64-NEXT: slli s11, a0, 47 +; RV64-NEXT: slli ra, a0, 48 +; RV64-NEXT: slli s3, a0, 49 +; RV64-NEXT: slli s2, a0, 50 +; RV64-NEXT: slli s1, a0, 51 +; RV64-NEXT: slli s0, a0, 52 +; RV64-NEXT: slli t6, a0, 53 +; RV64-NEXT: slli t5, a0, 54 +; RV64-NEXT: slli t4, a0, 55 +; RV64-NEXT: slli t3, a0, 56 +; RV64-NEXT: slli t2, a0, 57 +; RV64-NEXT: slli t1, a0, 58 +; RV64-NEXT: slli t0, a0, 59 +; RV64-NEXT: slli a3, a0, 60 +; RV64-NEXT: slli a2, a0, 61 +; RV64-NEXT: slli a1, a0, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vsll.vx v8, v8, s5 +; RV64-NEXT: vor.vv v8, v0, v8 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v16, v16, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v16, v16, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vand.vi v16, v8, 2 +; RV64-NEXT: vand.vi v24, v8, 1 +; RV64-NEXT: vand.vi v0, v8, 4 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v24 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vand.vi v16, v8, 8 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li s5, 16 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li s5, 32 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li s5, 64 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li s5, 128 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li s5, 256 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li s5, 512 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: li s5, 1024 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 1 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 2 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 4 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 8 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 7 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 16 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 32 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 64 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 128 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 256 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 512 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 1024 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 2048 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 8192 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 16384 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 32768 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 65536 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 131072 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: lui s5, 262144 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 8 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: mv s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: mv a1, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add a1, a1, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, a1 +; RV64-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s6 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s7 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 5 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s8 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s9 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s10 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s11 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, ra +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s3 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s3, vlenb +; RV64-NEXT: slli s3, s3, 3 +; RV64-NEXT: mv s5, s3 +; RV64-NEXT: slli s3, s3, 3 +; RV64-NEXT: add s3, s3, s5 +; RV64-NEXT: add s3, sp, s3 +; RV64-NEXT: addi s3, s3, 128 +; RV64-NEXT: vs8r.v v16, (s3) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s2 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s2, vlenb +; RV64-NEXT: slli s2, s2, 4 +; RV64-NEXT: mv s3, s2 +; RV64-NEXT: slli s2, s2, 1 +; RV64-NEXT: add s2, s2, s3 +; RV64-NEXT: add s2, sp, s2 +; RV64-NEXT: addi s2, s2, 128 +; RV64-NEXT: vs8r.v v16, (s2) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s1, vlenb +; RV64-NEXT: slli s1, s1, 7 +; RV64-NEXT: add s1, sp, s1 +; RV64-NEXT: addi s1, s1, 128 +; RV64-NEXT: vs8r.v v16, (s1) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, s0 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: slli s0, s0, 4 +; RV64-NEXT: mv s1, s0 +; RV64-NEXT: slli s0, s0, 1 +; RV64-NEXT: add s1, s1, s0 +; RV64-NEXT: slli s0, s0, 1 +; RV64-NEXT: add s0, s0, s1 +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 128 +; RV64-NEXT: vs8r.v v16, (s0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t6 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 5 +; RV64-NEXT: mv s0, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s0 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 128 +; RV64-NEXT: vs8r.v v16, (t6) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t5 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr t5, vlenb +; RV64-NEXT: slli t5, t5, 6 +; RV64-NEXT: add t5, sp, t5 +; RV64-NEXT: addi t5, t5, 128 +; RV64-NEXT: vs8r.v v16, (t5) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t4 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr t4, vlenb +; RV64-NEXT: slli t4, t4, 3 +; RV64-NEXT: mv t5, t4 +; RV64-NEXT: slli t4, t4, 2 +; RV64-NEXT: add t4, t4, t5 +; RV64-NEXT: add t4, sp, t4 +; RV64-NEXT: addi t4, t4, 128 +; RV64-NEXT: vs8r.v v16, (t4) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t3 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: slli t3, t3, 3 +; RV64-NEXT: mv t4, t3 +; RV64-NEXT: slli t3, t3, 1 +; RV64-NEXT: add t3, t3, t4 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 128 +; RV64-NEXT: vs8r.v v16, (t3) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t2 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: slli t2, t2, 3 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 128 +; RV64-NEXT: vs8r.v v16, (t2) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr t1, vlenb +; RV64-NEXT: slli t1, t1, 3 +; RV64-NEXT: mv t2, t1 +; RV64-NEXT: slli t1, t1, 1 +; RV64-NEXT: add t2, t2, t1 +; RV64-NEXT: slli t1, t1, 2 +; RV64-NEXT: add t1, t1, t2 +; RV64-NEXT: add t1, sp, t1 +; RV64-NEXT: addi t1, t1, 128 +; RV64-NEXT: vs8r.v v16, (t1) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t0 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr t0, vlenb +; RV64-NEXT: slli t0, t0, 4 +; RV64-NEXT: mv t1, t0 +; RV64-NEXT: slli t0, t0, 2 +; RV64-NEXT: add t0, t0, t1 +; RV64-NEXT: add t0, sp, t0 +; RV64-NEXT: addi t0, t0, 128 +; RV64-NEXT: vs8r.v v16, (t0) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, a3 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: mv t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add t0, t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, a3, t0 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, a2 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v24, v8 +; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 7 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v16, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsll.vx v8, v8, a0 +; RV64-NEXT: vand.vx v16, v16, s4 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsll.vx v16, v16, a1 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v24, v8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 8 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 6 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v16, v8, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v16, v8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v8, v24 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vsrl.vi v0, v16, 8 +; RV64-NEXT: vand.vx v0, v0, a4 +; RV64-NEXT: vsrl.vi v8, v8, 24 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vor.vv v8, v0, v8 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 5 +; RV64-NEXT: mv t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add t0, t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add t0, t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, a3, t0 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 7 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl8r.v v8, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v24, v8 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: mv t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add t0, t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, a3, t0 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 5 +; RV64-NEXT: mv t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, a3, t0 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 6 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: mv t0, a3 +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: add a3, a3, t0 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v8, v8, v24 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: mv t0, a3 +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, a3, t0 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v8, v24 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 128 +; RV64-NEXT: vl8r.v v0, (a3) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vand.vx v16, v16, a2 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vand.vx v0, v8, a4 +; RV64-NEXT: vsll.vi v0, v0, 8 +; RV64-NEXT: vor.vv v16, v16, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vor.vv v16, v0, v16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: mv a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a3, a3, a2 +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: addi a2, sp, 128 +; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload +; RV64-NEXT: vxor.vv v24, v24, v0 +; RV64-NEXT: vsrl.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vsrl.vx v24, v24, a0 +; RV64-NEXT: vor.vv v8, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload +; RV64-NEXT: vor.vv v8, v24, v8 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v16, v16, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v16, v16, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vsrl.vi v16, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 232(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 224(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 240 +; RV64-NEXT: ret + %a = call @llvm.clmulr.nxv8i64( %x, %y) + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll new file mode 100644 index 0000000000000..1c00086064133 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll @@ -0,0 +1,19366 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define <1 x i32> @clmul_v1i32(<1 x i32> %x, <1 x i32> %y) nounwind { +; CHECK-LABEL: clmul_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 2 +; CHECK-NEXT: vand.vi v11, v9, 1 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v11, v10 +; CHECK-NEXT: vand.vi v11, v9, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vi v11, v9, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vxor.vv v8, v10, v8 +; CHECK-NEXT: ret + %a = call <1 x i32> @llvm.clmul.v1i32(<1 x i32> %x, <1 x i32> %y) + ret <1 x i32> %a +} + +define <2 x i32> @clmul_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { +; CHECK-LABEL: clmul_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 2 +; CHECK-NEXT: vand.vi v11, v9, 1 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v11, v10 +; CHECK-NEXT: vand.vi v11, v9, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vi v11, v9, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vxor.vv v8, v10, v8 +; CHECK-NEXT: ret + %a = call <2 x i32> @llvm.clmul.v2i32(<2 x i32> %x, <2 x i32> %y) + ret <2 x i32> %a +} + +define <4 x i32> @clmul_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { +; CHECK-LABEL: clmul_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vand.vi v10, v9, 2 +; CHECK-NEXT: vand.vi v11, v9, 1 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v11, v10 +; CHECK-NEXT: vand.vi v11, v9, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vi v11, v9, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vand.vx v11, v9, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vxor.vv v10, v10, v11 +; CHECK-NEXT: vmul.vv v8, v8, v9 +; CHECK-NEXT: vxor.vv v8, v10, v8 +; CHECK-NEXT: ret + %a = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %a +} + +define <8 x i32> @clmul_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { +; CHECK-LABEL: clmul_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vand.vi v10, v8, 2 +; CHECK-NEXT: vand.vi v12, v8, 1 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v12, v10 +; CHECK-NEXT: vand.vi v12, v8, 4 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vi v12, v8, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vxor.vv v10, v10, v12 +; CHECK-NEXT: vand.vx v12, v8, a0 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vxor.vv v8, v10, v8 +; CHECK-NEXT: ret + %a = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %x, <8 x i32> %x) + ret <8 x i32> %a +} + +define <16 x i32> @clmul_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { +; CHECK-LABEL: clmul_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vand.vi v16, v12, 2 +; CHECK-NEXT: vand.vi v20, v12, 1 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v20, v16 +; CHECK-NEXT: vand.vi v20, v12, 4 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vi v20, v12, 8 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 32 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 64 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 128 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 256 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 512 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 1024 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 2048 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 4096 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 8192 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 16384 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 32768 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 65536 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 131072 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 262144 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vand.vx v20, v12, a0 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vxor.vv v16, v16, v20 +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vxor.vv v8, v16, v8 +; CHECK-NEXT: ret + %a = call <16 x i32> @llvm.clmul.v16i32(<16 x i32> %x, <16 x i32> %y) + ret <16 x i32> %a +} + +define <1 x i64> @clmul_v1i64(<1 x i64> %x, <1 x i64> %y) nounwind { +; RV32-LABEL: clmul_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li t5, 1 +; RV32-NEXT: li a4, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li s11, 8 +; RV32-NEXT: li a0, 16 +; RV32-NEXT: li ra, 32 +; RV32-NEXT: li s10, 64 +; RV32-NEXT: li s9, 128 +; RV32-NEXT: li s8, 256 +; RV32-NEXT: li s7, 512 +; RV32-NEXT: li s1, 1024 +; RV32-NEXT: lui s6, 1 +; RV32-NEXT: lui s5, 2 +; RV32-NEXT: lui s4, 4 +; RV32-NEXT: lui s3, 8 +; RV32-NEXT: lui s2, 16 +; RV32-NEXT: lui s0, 32 +; RV32-NEXT: lui t6, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a3, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw t5, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a4, 260(sp) +; RV32-NEXT: lui a4, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s11, 244(sp) +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vand.vi v13, v9, 2 +; RV32-NEXT: vand.vi v14, v9, 1 +; RV32-NEXT: vand.vi v12, v9, 4 +; RV32-NEXT: vand.vi v11, v9, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw a0, 236(sp) +; RV32-NEXT: vand.vx v10, v9, a0 +; RV32-NEXT: addi s11, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw ra, 228(sp) +; RV32-NEXT: vand.vx v15, v9, ra +; RV32-NEXT: addi ra, sp, 264 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s10, 220(sp) +; RV32-NEXT: vand.vx v16, v9, s10 +; RV32-NEXT: addi s10, sp, 256 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s9, 212(sp) +; RV32-NEXT: vand.vx v17, v9, s9 +; RV32-NEXT: addi s9, sp, 248 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s8, 204(sp) +; RV32-NEXT: vand.vx v18, v9, s8 +; RV32-NEXT: addi s8, sp, 240 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s7, 196(sp) +; RV32-NEXT: vand.vx v19, v9, s7 +; RV32-NEXT: addi s7, sp, 232 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw s1, 188(sp) +; RV32-NEXT: vand.vx v20, v9, s1 +; RV32-NEXT: slli t5, t5, 11 +; RV32-NEXT: vand.vx v21, v9, s6 +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw t5, 180(sp) +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw s6, 172(sp) +; RV32-NEXT: addi s6, sp, 216 +; RV32-NEXT: vand.vx v22, v9, s5 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s5, 164(sp) +; RV32-NEXT: addi s5, sp, 208 +; RV32-NEXT: vand.vx v23, v9, s4 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s4, 156(sp) +; RV32-NEXT: addi s4, sp, 200 +; RV32-NEXT: vand.vx v24, v9, s3 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s3, 148(sp) +; RV32-NEXT: addi s3, sp, 192 +; RV32-NEXT: vand.vx v25, v9, s2 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s2, 140(sp) +; RV32-NEXT: addi s2, sp, 184 +; RV32-NEXT: vand.vx v26, v9, s0 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s0, 132(sp) +; RV32-NEXT: addi s1, sp, 176 +; RV32-NEXT: vand.vx v27, v9, t6 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t6, 124(sp) +; RV32-NEXT: addi s0, sp, 168 +; RV32-NEXT: vand.vx v28, v9, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi t6, sp, 160 +; RV32-NEXT: vand.vx v29, v9, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t4, sp, 152 +; RV32-NEXT: vand.vx v30, v9, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi t3, sp, 144 +; RV32-NEXT: vand.vx v31, v9, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t2, sp, 136 +; RV32-NEXT: vand.vx v7, v9, t0 +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t1, sp, 128 +; RV32-NEXT: vand.vx v6, v9, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi t0, sp, 120 +; RV32-NEXT: vand.vx v5, v9, a6 +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi a7, sp, 112 +; RV32-NEXT: vand.vx v4, v9, a5 +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi a6, sp, 104 +; RV32-NEXT: vand.vx v3, v9, a3 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a3, 52(sp) +; RV32-NEXT: addi a5, sp, 96 +; RV32-NEXT: vand.vx v2, v9, a4 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a4, 44(sp) +; RV32-NEXT: addi a4, sp, 88 +; RV32-NEXT: vand.vx v1, v9, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a0, 262144 +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a2, sp, 72 +; RV32-NEXT: vand.vx v0, v9, t5 +; RV32-NEXT: addi a1, sp, 64 +; RV32-NEXT: vmul.vv v13, v8, v13 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vxor.vi v14, v14, 0 +; RV32-NEXT: vxor.vv v14, v14, v13 +; RV32-NEXT: vlse64.v v13, (s11), zero +; RV32-NEXT: addi s11, sp, 56 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v14, v14, v12 +; RV32-NEXT: vlse64.v v12, (ra), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv ra, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, ra +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v12, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi ra, sp, 48 +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v14, v14, v11 +; RV32-NEXT: vlse64.v v11, (s10), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli s10, t5, 2 +; RV32-NEXT: add t5, s10, t5 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s10, sp, 40 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v14, v14, v10 +; RV32-NEXT: vlse64.v v10, (s9), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v10, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi t5, sp, 32 +; RV32-NEXT: vmul.vv v15, v8, v15 +; RV32-NEXT: vxor.vv v15, v14, v15 +; RV32-NEXT: vlse64.v v10, (s8), zero +; RV32-NEXT: csrr s8, vlenb +; RV32-NEXT: slli s9, s8, 1 +; RV32-NEXT: add s8, s9, s8 +; RV32-NEXT: add s8, sp, s8 +; RV32-NEXT: addi s8, s8, 288 +; RV32-NEXT: vs1r.v v10, (s8) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s8, sp, 24 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v16, v15, v16 +; RV32-NEXT: vlse64.v v10, (s7), zero +; RV32-NEXT: csrr s7, vlenb +; RV32-NEXT: slli s7, s7, 1 +; RV32-NEXT: add s7, sp, s7 +; RV32-NEXT: addi s7, s7, 288 +; RV32-NEXT: vs1r.v v10, (s7) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s7, sp, 16 +; RV32-NEXT: vmul.vv v17, v8, v17 +; RV32-NEXT: vmul.vv v18, v8, v18 +; RV32-NEXT: vmul.vv v19, v8, v19 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vmul.vv v21, v8, v21 +; RV32-NEXT: vmul.vv v22, v8, v22 +; RV32-NEXT: vmul.vv v23, v8, v23 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vmul.vv v25, v8, v25 +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vmul.vv v27, v8, v27 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vmul.vv v29, v8, v29 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vmul.vv v31, v8, v31 +; RV32-NEXT: vmul.vv v7, v8, v7 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: vmul.vv v5, v8, v5 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vmul.vv v3, v8, v3 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vmul.vv v1, v8, v1 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v16, v16, v17 +; RV32-NEXT: addi s9, sp, 224 +; RV32-NEXT: vlse64.v v11, (s9), zero +; RV32-NEXT: vxor.vv v16, v16, v18 +; RV32-NEXT: vlse64.v v10, (s6), zero +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 288 +; RV32-NEXT: vs1r.v v10, (s6) # vscale x 8-byte Folded Spill +; RV32-NEXT: vxor.vv v16, v16, v19 +; RV32-NEXT: vlse64.v v10, (s5), zero +; RV32-NEXT: addi s5, sp, 288 +; RV32-NEXT: vs1r.v v10, (s5) # vscale x 8-byte Folded Spill +; RV32-NEXT: vxor.vv v16, v16, v20 +; RV32-NEXT: vlse64.v v12, (s4), zero +; RV32-NEXT: vxor.vv v16, v16, v0 +; RV32-NEXT: vlse64.v v0, (s3), zero +; RV32-NEXT: vxor.vv v16, v16, v21 +; RV32-NEXT: vlse64.v v21, (s2), zero +; RV32-NEXT: vxor.vv v16, v16, v22 +; RV32-NEXT: vlse64.v v22, (s1), zero +; RV32-NEXT: vxor.vv v16, v16, v23 +; RV32-NEXT: vlse64.v v23, (s0), zero +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: vlse64.v v24, (t6), zero +; RV32-NEXT: vxor.vv v16, v16, v25 +; RV32-NEXT: vlse64.v v25, (t4), zero +; RV32-NEXT: vxor.vv v16, v16, v26 +; RV32-NEXT: vlse64.v v26, (t3), zero +; RV32-NEXT: vxor.vv v16, v16, v27 +; RV32-NEXT: vlse64.v v27, (t2), zero +; RV32-NEXT: vxor.vv v16, v16, v28 +; RV32-NEXT: vlse64.v v28, (t1), zero +; RV32-NEXT: vxor.vv v16, v16, v29 +; RV32-NEXT: vlse64.v v29, (t0), zero +; RV32-NEXT: vxor.vv v16, v16, v30 +; RV32-NEXT: vlse64.v v30, (a7), zero +; RV32-NEXT: vxor.vv v16, v16, v31 +; RV32-NEXT: vlse64.v v31, (a6), zero +; RV32-NEXT: vxor.vv v16, v16, v7 +; RV32-NEXT: vlse64.v v7, (a5), zero +; RV32-NEXT: vxor.vv v16, v16, v6 +; RV32-NEXT: vlse64.v v6, (a4), zero +; RV32-NEXT: vxor.vv v16, v16, v5 +; RV32-NEXT: vlse64.v v5, (a3), zero +; RV32-NEXT: vxor.vv v16, v16, v4 +; RV32-NEXT: vlse64.v v4, (a2), zero +; RV32-NEXT: vxor.vv v16, v16, v3 +; RV32-NEXT: vlse64.v v3, (a1), zero +; RV32-NEXT: vxor.vv v16, v16, v2 +; RV32-NEXT: vlse64.v v2, (s11), zero +; RV32-NEXT: vxor.vv v1, v16, v1 +; RV32-NEXT: vlse64.v v10, (ra), zero +; RV32-NEXT: vand.vv v13, v9, v13 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v14, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v14, v9, v14 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v15, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v15, v9, v15 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v16, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v16, v9, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v17, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v17, v9, v17 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v18, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v18, v9, v18 +; RV32-NEXT: vand.vv v19, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v20, v9, v11 +; RV32-NEXT: addi a1, sp, 288 +; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v11, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v11, v9, v12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v0, v9, v0 +; RV32-NEXT: vand.vv v21, v9, v21 +; RV32-NEXT: vand.vv v22, v9, v22 +; RV32-NEXT: vand.vv v23, v9, v23 +; RV32-NEXT: vand.vv v24, v9, v24 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vand.vv v26, v9, v26 +; RV32-NEXT: vand.vv v27, v9, v27 +; RV32-NEXT: vand.vv v28, v9, v28 +; RV32-NEXT: vand.vv v29, v9, v29 +; RV32-NEXT: vand.vv v30, v9, v30 +; RV32-NEXT: vand.vv v31, v9, v31 +; RV32-NEXT: vand.vv v7, v9, v7 +; RV32-NEXT: vand.vv v6, v9, v6 +; RV32-NEXT: vand.vv v5, v9, v5 +; RV32-NEXT: vand.vv v4, v9, v4 +; RV32-NEXT: vand.vv v11, v9, v3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v2, v9, v2 +; RV32-NEXT: vand.vv v10, v9, v10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vlse64.v v10, (s10), zero +; RV32-NEXT: vlse64.v v3, (t5), zero +; RV32-NEXT: vlse64.v v11, (s8), zero +; RV32-NEXT: vlse64.v v12, (s7), zero +; RV32-NEXT: vand.vv v10, v9, v10 +; RV32-NEXT: vand.vv v3, v9, v3 +; RV32-NEXT: vand.vv v11, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v12, v9, v12 +; RV32-NEXT: vand.vx v9, v9, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: vxor.vv v9, v1, v9 +; RV32-NEXT: vmul.vv v11, v8, v13 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v14 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v15 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v16 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v17 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v18 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v19 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v20 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v0 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v21 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v22 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v23 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v24 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v25 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v26 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v27 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v28 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v29 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v30 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v31 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v7 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v6 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v5 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v4 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v2 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: vmul.vv v10, v8, v3 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v9, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vand.vi v10, v9, 2 +; RV64-NEXT: vand.vi v11, v9, 1 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v11, v10 +; RV64-NEXT: vand.vi v11, v9, 4 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vi v11, v9, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vmul.vv v8, v8, v9 +; RV64-NEXT: vxor.vv v8, v10, v8 +; RV64-NEXT: ret + %a = call <1 x i64> @llvm.clmul.v1i64(<1 x i64> %x, <1 x i64> %y) + ret <1 x i64> %a +} + +define <2 x i64> @clmul_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { +; RV32-LABEL: clmul_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li t5, 1 +; RV32-NEXT: li a4, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li s11, 8 +; RV32-NEXT: li a0, 16 +; RV32-NEXT: li ra, 32 +; RV32-NEXT: li s10, 64 +; RV32-NEXT: li s9, 128 +; RV32-NEXT: li s8, 256 +; RV32-NEXT: li s7, 512 +; RV32-NEXT: li s1, 1024 +; RV32-NEXT: lui s6, 1 +; RV32-NEXT: lui s5, 2 +; RV32-NEXT: lui s4, 4 +; RV32-NEXT: lui s3, 8 +; RV32-NEXT: lui s2, 16 +; RV32-NEXT: lui s0, 32 +; RV32-NEXT: lui t6, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a3, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw t5, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a4, 260(sp) +; RV32-NEXT: lui a4, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s11, 244(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vand.vi v13, v9, 2 +; RV32-NEXT: vand.vi v14, v9, 1 +; RV32-NEXT: vand.vi v12, v9, 4 +; RV32-NEXT: vand.vi v11, v9, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw a0, 236(sp) +; RV32-NEXT: vand.vx v10, v9, a0 +; RV32-NEXT: addi s11, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw ra, 228(sp) +; RV32-NEXT: vand.vx v15, v9, ra +; RV32-NEXT: addi ra, sp, 264 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s10, 220(sp) +; RV32-NEXT: vand.vx v16, v9, s10 +; RV32-NEXT: addi s10, sp, 256 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s9, 212(sp) +; RV32-NEXT: vand.vx v17, v9, s9 +; RV32-NEXT: addi s9, sp, 248 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s8, 204(sp) +; RV32-NEXT: vand.vx v18, v9, s8 +; RV32-NEXT: addi s8, sp, 240 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s7, 196(sp) +; RV32-NEXT: vand.vx v19, v9, s7 +; RV32-NEXT: addi s7, sp, 232 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw s1, 188(sp) +; RV32-NEXT: vand.vx v20, v9, s1 +; RV32-NEXT: slli t5, t5, 11 +; RV32-NEXT: vand.vx v21, v9, s6 +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw t5, 180(sp) +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw s6, 172(sp) +; RV32-NEXT: addi s6, sp, 216 +; RV32-NEXT: vand.vx v22, v9, s5 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s5, 164(sp) +; RV32-NEXT: addi s5, sp, 208 +; RV32-NEXT: vand.vx v23, v9, s4 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s4, 156(sp) +; RV32-NEXT: addi s4, sp, 200 +; RV32-NEXT: vand.vx v24, v9, s3 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s3, 148(sp) +; RV32-NEXT: addi s3, sp, 192 +; RV32-NEXT: vand.vx v25, v9, s2 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s2, 140(sp) +; RV32-NEXT: addi s2, sp, 184 +; RV32-NEXT: vand.vx v26, v9, s0 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s0, 132(sp) +; RV32-NEXT: addi s1, sp, 176 +; RV32-NEXT: vand.vx v27, v9, t6 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t6, 124(sp) +; RV32-NEXT: addi s0, sp, 168 +; RV32-NEXT: vand.vx v28, v9, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi t6, sp, 160 +; RV32-NEXT: vand.vx v29, v9, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t4, sp, 152 +; RV32-NEXT: vand.vx v30, v9, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi t3, sp, 144 +; RV32-NEXT: vand.vx v31, v9, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t2, sp, 136 +; RV32-NEXT: vand.vx v7, v9, t0 +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t1, sp, 128 +; RV32-NEXT: vand.vx v6, v9, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi t0, sp, 120 +; RV32-NEXT: vand.vx v5, v9, a6 +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi a7, sp, 112 +; RV32-NEXT: vand.vx v4, v9, a5 +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi a6, sp, 104 +; RV32-NEXT: vand.vx v3, v9, a3 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a3, 52(sp) +; RV32-NEXT: addi a5, sp, 96 +; RV32-NEXT: vand.vx v2, v9, a4 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a4, 44(sp) +; RV32-NEXT: addi a4, sp, 88 +; RV32-NEXT: vand.vx v1, v9, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a0, 262144 +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a2, sp, 72 +; RV32-NEXT: vand.vx v0, v9, t5 +; RV32-NEXT: addi a1, sp, 64 +; RV32-NEXT: vmul.vv v13, v8, v13 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vxor.vi v14, v14, 0 +; RV32-NEXT: vxor.vv v14, v14, v13 +; RV32-NEXT: vlse64.v v13, (s11), zero +; RV32-NEXT: addi s11, sp, 56 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v14, v14, v12 +; RV32-NEXT: vlse64.v v12, (ra), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv ra, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, ra +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v12, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi ra, sp, 48 +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v14, v14, v11 +; RV32-NEXT: vlse64.v v11, (s10), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli s10, t5, 2 +; RV32-NEXT: add t5, s10, t5 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s10, sp, 40 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v14, v14, v10 +; RV32-NEXT: vlse64.v v10, (s9), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs1r.v v10, (t5) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi t5, sp, 32 +; RV32-NEXT: vmul.vv v15, v8, v15 +; RV32-NEXT: vxor.vv v15, v14, v15 +; RV32-NEXT: vlse64.v v10, (s8), zero +; RV32-NEXT: csrr s8, vlenb +; RV32-NEXT: slli s9, s8, 1 +; RV32-NEXT: add s8, s9, s8 +; RV32-NEXT: add s8, sp, s8 +; RV32-NEXT: addi s8, s8, 288 +; RV32-NEXT: vs1r.v v10, (s8) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s8, sp, 24 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v16, v15, v16 +; RV32-NEXT: vlse64.v v10, (s7), zero +; RV32-NEXT: csrr s7, vlenb +; RV32-NEXT: slli s7, s7, 1 +; RV32-NEXT: add s7, sp, s7 +; RV32-NEXT: addi s7, s7, 288 +; RV32-NEXT: vs1r.v v10, (s7) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi s7, sp, 16 +; RV32-NEXT: vmul.vv v17, v8, v17 +; RV32-NEXT: vmul.vv v18, v8, v18 +; RV32-NEXT: vmul.vv v19, v8, v19 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vmul.vv v21, v8, v21 +; RV32-NEXT: vmul.vv v22, v8, v22 +; RV32-NEXT: vmul.vv v23, v8, v23 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vmul.vv v25, v8, v25 +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vmul.vv v27, v8, v27 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vmul.vv v29, v8, v29 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vmul.vv v31, v8, v31 +; RV32-NEXT: vmul.vv v7, v8, v7 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: vmul.vv v5, v8, v5 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vmul.vv v3, v8, v3 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vmul.vv v1, v8, v1 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v16, v16, v17 +; RV32-NEXT: addi s9, sp, 224 +; RV32-NEXT: vlse64.v v11, (s9), zero +; RV32-NEXT: vxor.vv v16, v16, v18 +; RV32-NEXT: vlse64.v v10, (s6), zero +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 288 +; RV32-NEXT: vs1r.v v10, (s6) # vscale x 8-byte Folded Spill +; RV32-NEXT: vxor.vv v16, v16, v19 +; RV32-NEXT: vlse64.v v10, (s5), zero +; RV32-NEXT: addi s5, sp, 288 +; RV32-NEXT: vs1r.v v10, (s5) # vscale x 8-byte Folded Spill +; RV32-NEXT: vxor.vv v16, v16, v20 +; RV32-NEXT: vlse64.v v12, (s4), zero +; RV32-NEXT: vxor.vv v16, v16, v0 +; RV32-NEXT: vlse64.v v0, (s3), zero +; RV32-NEXT: vxor.vv v16, v16, v21 +; RV32-NEXT: vlse64.v v21, (s2), zero +; RV32-NEXT: vxor.vv v16, v16, v22 +; RV32-NEXT: vlse64.v v22, (s1), zero +; RV32-NEXT: vxor.vv v16, v16, v23 +; RV32-NEXT: vlse64.v v23, (s0), zero +; RV32-NEXT: vxor.vv v16, v16, v24 +; RV32-NEXT: vlse64.v v24, (t6), zero +; RV32-NEXT: vxor.vv v16, v16, v25 +; RV32-NEXT: vlse64.v v25, (t4), zero +; RV32-NEXT: vxor.vv v16, v16, v26 +; RV32-NEXT: vlse64.v v26, (t3), zero +; RV32-NEXT: vxor.vv v16, v16, v27 +; RV32-NEXT: vlse64.v v27, (t2), zero +; RV32-NEXT: vxor.vv v16, v16, v28 +; RV32-NEXT: vlse64.v v28, (t1), zero +; RV32-NEXT: vxor.vv v16, v16, v29 +; RV32-NEXT: vlse64.v v29, (t0), zero +; RV32-NEXT: vxor.vv v16, v16, v30 +; RV32-NEXT: vlse64.v v30, (a7), zero +; RV32-NEXT: vxor.vv v16, v16, v31 +; RV32-NEXT: vlse64.v v31, (a6), zero +; RV32-NEXT: vxor.vv v16, v16, v7 +; RV32-NEXT: vlse64.v v7, (a5), zero +; RV32-NEXT: vxor.vv v16, v16, v6 +; RV32-NEXT: vlse64.v v6, (a4), zero +; RV32-NEXT: vxor.vv v16, v16, v5 +; RV32-NEXT: vlse64.v v5, (a3), zero +; RV32-NEXT: vxor.vv v16, v16, v4 +; RV32-NEXT: vlse64.v v4, (a2), zero +; RV32-NEXT: vxor.vv v16, v16, v3 +; RV32-NEXT: vlse64.v v3, (a1), zero +; RV32-NEXT: vxor.vv v16, v16, v2 +; RV32-NEXT: vlse64.v v2, (s11), zero +; RV32-NEXT: vxor.vv v1, v16, v1 +; RV32-NEXT: vlse64.v v10, (ra), zero +; RV32-NEXT: vand.vv v13, v9, v13 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v14, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v14, v9, v14 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v15, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v15, v9, v15 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v16, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v16, v9, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v17, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v17, v9, v17 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v18, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v18, v9, v18 +; RV32-NEXT: vand.vv v19, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v20, v9, v11 +; RV32-NEXT: addi a1, sp, 288 +; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v11, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v11, v9, v12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v0, v9, v0 +; RV32-NEXT: vand.vv v21, v9, v21 +; RV32-NEXT: vand.vv v22, v9, v22 +; RV32-NEXT: vand.vv v23, v9, v23 +; RV32-NEXT: vand.vv v24, v9, v24 +; RV32-NEXT: vand.vv v25, v9, v25 +; RV32-NEXT: vand.vv v26, v9, v26 +; RV32-NEXT: vand.vv v27, v9, v27 +; RV32-NEXT: vand.vv v28, v9, v28 +; RV32-NEXT: vand.vv v29, v9, v29 +; RV32-NEXT: vand.vv v30, v9, v30 +; RV32-NEXT: vand.vv v31, v9, v31 +; RV32-NEXT: vand.vv v7, v9, v7 +; RV32-NEXT: vand.vv v6, v9, v6 +; RV32-NEXT: vand.vv v5, v9, v5 +; RV32-NEXT: vand.vv v4, v9, v4 +; RV32-NEXT: vand.vv v11, v9, v3 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v2, v9, v2 +; RV32-NEXT: vand.vv v10, v9, v10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vlse64.v v10, (s10), zero +; RV32-NEXT: vlse64.v v3, (t5), zero +; RV32-NEXT: vlse64.v v11, (s8), zero +; RV32-NEXT: vlse64.v v12, (s7), zero +; RV32-NEXT: vand.vv v10, v9, v10 +; RV32-NEXT: vand.vv v3, v9, v3 +; RV32-NEXT: vand.vv v11, v9, v11 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v12, v9, v12 +; RV32-NEXT: vand.vx v9, v9, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: vxor.vv v9, v1, v9 +; RV32-NEXT: vmul.vv v11, v8, v13 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v14 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v15 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v16 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v17 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v18 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v19 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v20 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v0 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v21 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v22 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v23 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v24 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v25 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v26 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v27 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v28 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v29 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v30 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v31 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v7 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v6 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v5 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v4 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v11, v8, v2 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v11, v8, v11 +; RV32-NEXT: vxor.vv v9, v9, v11 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: vmul.vv v10, v8, v3 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v9, v9, v10 +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v9, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vand.vi v10, v9, 2 +; RV64-NEXT: vand.vi v11, v9, 1 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v11, v10 +; RV64-NEXT: vand.vi v11, v9, 4 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vi v11, v9, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vand.vx v11, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 +; RV64-NEXT: vmul.vv v11, v8, v11 +; RV64-NEXT: vxor.vv v10, v10, v11 +; RV64-NEXT: vmul.vv v8, v8, v9 +; RV64-NEXT: vxor.vv v8, v10, v8 +; RV64-NEXT: ret + %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %a +} + +define <4 x i64> @clmul_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { +; RV32-LABEL: clmul_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li s2, 1 +; RV32-NEXT: li a3, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li s7, 8 +; RV32-NEXT: li a0, 16 +; RV32-NEXT: li s6, 32 +; RV32-NEXT: li s5, 64 +; RV32-NEXT: li s4, 128 +; RV32-NEXT: li s1, 256 +; RV32-NEXT: li s0, 512 +; RV32-NEXT: li t5, 1024 +; RV32-NEXT: lui ra, 1 +; RV32-NEXT: lui s8, 2 +; RV32-NEXT: lui s10, 4 +; RV32-NEXT: lui s11, 8 +; RV32-NEXT: lui s9, 16 +; RV32-NEXT: lui s3, 32 +; RV32-NEXT: lui t6, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a4, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw s2, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a3, 260(sp) +; RV32-NEXT: lui a3, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s7, 244(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vand.vi v28, v10, 2 +; RV32-NEXT: vand.vi v20, v10, 1 +; RV32-NEXT: vand.vi v30, v10, 4 +; RV32-NEXT: vand.vi v14, v10, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw a0, 236(sp) +; RV32-NEXT: vand.vx v12, v10, a0 +; RV32-NEXT: addi s7, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw s6, 228(sp) +; RV32-NEXT: vand.vx v16, v10, s6 +; RV32-NEXT: addi s6, sp, 264 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s5, 220(sp) +; RV32-NEXT: vand.vx v18, v10, s5 +; RV32-NEXT: addi s5, sp, 256 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s4, 212(sp) +; RV32-NEXT: vand.vx v0, v10, s4 +; RV32-NEXT: addi s4, sp, 248 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s1, 204(sp) +; RV32-NEXT: vand.vx v6, v10, s1 +; RV32-NEXT: addi s1, sp, 240 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s0, 196(sp) +; RV32-NEXT: vand.vx v4, v10, s0 +; RV32-NEXT: addi s0, sp, 232 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw t5, 188(sp) +; RV32-NEXT: vand.vx v2, v10, t5 +; RV32-NEXT: slli s2, s2, 11 +; RV32-NEXT: vand.vx v24, v10, ra +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw s2, 180(sp) +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw ra, 172(sp) +; RV32-NEXT: addi t5, sp, 216 +; RV32-NEXT: vand.vx v26, v10, s8 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s8, 164(sp) +; RV32-NEXT: addi s8, sp, 208 +; RV32-NEXT: vand.vx v22, v10, s10 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s10, 156(sp) +; RV32-NEXT: addi s10, sp, 200 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vi v20, v20, 0 +; RV32-NEXT: vxor.vv v20, v20, v28 +; RV32-NEXT: vand.vx v28, v10, s11 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s11, 148(sp) +; RV32-NEXT: addi s11, sp, 192 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vxor.vv v20, v20, v30 +; RV32-NEXT: vand.vx v30, v10, s9 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s9, 140(sp) +; RV32-NEXT: addi s9, sp, 184 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vxor.vv v14, v20, v14 +; RV32-NEXT: vand.vx v20, v10, s3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv ra, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, ra +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v20, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s3, 132(sp) +; RV32-NEXT: addi s3, sp, 176 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v12, v14, v12 +; RV32-NEXT: vand.vx v14, v10, t6 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t6, 124(sp) +; RV32-NEXT: addi t6, sp, 168 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v10, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi t4, sp, 160 +; RV32-NEXT: vmul.vv v18, v8, v18 +; RV32-NEXT: vxor.vv v18, v12, v18 +; RV32-NEXT: vand.vx v12, v10, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t3, sp, 152 +; RV32-NEXT: vmul.vv v20, v8, v0 +; RV32-NEXT: vxor.vv v18, v18, v20 +; RV32-NEXT: vand.vx v20, v10, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi t2, sp, 144 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: vxor.vv v18, v18, v6 +; RV32-NEXT: vand.vx v6, v10, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t1, sp, 136 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v18, v18, v4 +; RV32-NEXT: vand.vx v4, v10, t0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv ra, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add ra, ra, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, ra +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t0, sp, 128 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vxor.vv v18, v18, v2 +; RV32-NEXT: vand.vx v2, v10, s2 +; RV32-NEXT: addi ra, sp, 120 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vxor.vv v18, v18, v2 +; RV32-NEXT: vand.vx v2, v10, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi a7, sp, 112 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v18, v18, v24 +; RV32-NEXT: vand.vx v4, v10, a6 +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi a6, sp, 104 +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vxor.vv v18, v18, v26 +; RV32-NEXT: vand.vx v26, v10, a5 +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi a5, sp, 96 +; RV32-NEXT: vmul.vv v22, v8, v22 +; RV32-NEXT: vxor.vv v18, v18, v22 +; RV32-NEXT: vand.vx v24, v10, a4 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a4, 52(sp) +; RV32-NEXT: addi a4, sp, 88 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v18, v18, v28 +; RV32-NEXT: vand.vx v28, v10, a3 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a3, 44(sp) +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vxor.vv v18, v18, v30 +; RV32-NEXT: vand.vx v30, v10, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: addi a2, sp, 72 +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a0, 262144 +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a1, sp, 64 +; RV32-NEXT: sw a6, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv s2, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, s2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vl2r.v v22, (a6) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v22 +; RV32-NEXT: vxor.vv v0, v18, v0 +; RV32-NEXT: vlse64.v v18, (s7), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv s2, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, s2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s7, sp, 56 +; RV32-NEXT: vmul.vv v14, v8, v14 +; RV32-NEXT: vxor.vv v14, v0, v14 +; RV32-NEXT: vlse64.v v18, (s6), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv s2, a6 +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: add a6, a6, s2 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s2, sp, 48 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v14, v14, v16 +; RV32-NEXT: vlse64.v v16, (s5), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: mv s5, a6 +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: add a6, a6, s5 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v16, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s5, sp, 40 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v12, v14, v12 +; RV32-NEXT: vlse64.v v14, (s4), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 5 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v14, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s4, sp, 32 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v12, v20 +; RV32-NEXT: vlse64.v v12, (s1), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: mv s1, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add s1, s1, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add s1, s1, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, s1 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s1, sp, 24 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: vxor.vv v20, v20, v6 +; RV32-NEXT: vlse64.v v12, (s0), zero +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv s0, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add s0, s0, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, s0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 288 +; RV32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi s0, sp, 16 +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: slli s6, s6, 1 +; RV32-NEXT: mv a6, s6 +; RV32-NEXT: slli s6, s6, 1 +; RV32-NEXT: add a6, a6, s6 +; RV32-NEXT: slli s6, s6, 3 +; RV32-NEXT: add s6, s6, a6 +; RV32-NEXT: lw a6, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 288 +; RV32-NEXT: vl2r.v v12, (s6) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v6, v8, v12 +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: vxor.vv v20, v20, v6 +; RV32-NEXT: addi s6, sp, 224 +; RV32-NEXT: vlse64.v v0, (s6), zero +; RV32-NEXT: vxor.vv v20, v20, v2 +; RV32-NEXT: vlse64.v v6, (t5), zero +; RV32-NEXT: vxor.vv v20, v20, v4 +; RV32-NEXT: vlse64.v v22, (s8), zero +; RV32-NEXT: vxor.vv v20, v20, v26 +; RV32-NEXT: vlse64.v v18, (s10), zero +; RV32-NEXT: vxor.vv v20, v20, v24 +; RV32-NEXT: vlse64.v v16, (s11), zero +; RV32-NEXT: vxor.vv v20, v20, v28 +; RV32-NEXT: vlse64.v v14, (s9), zero +; RV32-NEXT: vxor.vv v2, v20, v30 +; RV32-NEXT: vlse64.v v12, (s3), zero +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v26, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v4, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 4 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v30, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 5 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v20, v10, v20 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v28, v10, v24 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v24, v10, v24 +; RV32-NEXT: vand.vv v0, v10, v0 +; RV32-NEXT: vand.vv v6, v10, v6 +; RV32-NEXT: vand.vv v22, v10, v22 +; RV32-NEXT: vand.vv v18, v10, v18 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v16, v10, v16 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v16, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v14, v10, v14 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add s3, s3, t5 +; RV32-NEXT: slli t5, t5, 1 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v14, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr t5, vlenb +; RV32-NEXT: slli t5, t5, 3 +; RV32-NEXT: mv s3, t5 +; RV32-NEXT: slli t5, t5, 2 +; RV32-NEXT: add t5, t5, s3 +; RV32-NEXT: add t5, sp, t5 +; RV32-NEXT: addi t5, t5, 288 +; RV32-NEXT: vs2r.v v12, (t5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (t6), zero +; RV32-NEXT: vlse64.v v14, (t4), zero +; RV32-NEXT: vlse64.v v16, (t3), zero +; RV32-NEXT: vlse64.v v18, (t2), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 2 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t3, t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: mv t3, t2 +; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: add t3, t3, t2 +; RV32-NEXT: slli t2, t2, 3 +; RV32-NEXT: add t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 288 +; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (t1), zero +; RV32-NEXT: vlse64.v v14, (t0), zero +; RV32-NEXT: vlse64.v v16, (ra), zero +; RV32-NEXT: vlse64.v v18, (a7), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 4 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: add t0, t0, a7 +; RV32-NEXT: slli a7, a7, 1 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: mv t0, a7 +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 288 +; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (a6), zero +; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v16, (a4), zero +; RV32-NEXT: vlse64.v v18, (a3), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a4, a4, a3 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: mv a4, a3 +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 288 +; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v12, (a2), zero +; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vlse64.v v16, (s7), zero +; RV32-NEXT: vlse64.v v18, (s2), zero +; RV32-NEXT: vand.vv v12, v10, v12 +; RV32-NEXT: addi a1, sp, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v14 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a2, a2, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v12, v10, v18 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vlse64.v v14, (s5), zero +; RV32-NEXT: vlse64.v v16, (s4), zero +; RV32-NEXT: vlse64.v v18, (s1), zero +; RV32-NEXT: vlse64.v v12, (s0), zero +; RV32-NEXT: vand.vv v14, v10, v14 +; RV32-NEXT: vand.vv v16, v10, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 288 +; RV32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v18, v10, v18 +; RV32-NEXT: vand.vv v16, v10, v12 +; RV32-NEXT: vand.vx v10, v10, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vxor.vv v10, v2, v10 +; RV32-NEXT: vmul.vv v12, v8, v26 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v4 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v30 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v20 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v28 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v24 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v0 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v6 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v22 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v14 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v12, v8, v18 +; RV32-NEXT: vxor.vv v10, v10, v12 +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v10, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vand.vi v12, v10, 2 +; RV64-NEXT: vand.vi v14, v10, 1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v14, v12 +; RV64-NEXT: vand.vi v14, v10, 4 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vi v14, v10, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vand.vx v14, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vmul.vv v14, v8, v14 +; RV64-NEXT: vxor.vv v12, v12, v14 +; RV64-NEXT: vmul.vv v8, v8, v10 +; RV64-NEXT: vxor.vv v8, v12, v8 +; RV64-NEXT: ret + %a = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %x, <4 x i64> %y) + ret <4 x i64> %a +} + +define <8 x i64> @clmul_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { +; RV32-LABEL: clmul_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui a1, 524288 +; RV32-NEXT: li s4, 1 +; RV32-NEXT: li a3, 2 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li a0, 8 +; RV32-NEXT: li s3, 16 +; RV32-NEXT: li s2, 32 +; RV32-NEXT: li s5, 64 +; RV32-NEXT: li s6, 128 +; RV32-NEXT: li s8, 256 +; RV32-NEXT: li s1, 512 +; RV32-NEXT: li s7, 1024 +; RV32-NEXT: lui ra, 1 +; RV32-NEXT: lui s11, 2 +; RV32-NEXT: lui s10, 4 +; RV32-NEXT: lui s9, 8 +; RV32-NEXT: lui s0, 16 +; RV32-NEXT: lui t6, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t4, 128 +; RV32-NEXT: lui t3, 256 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: lui t1, 1024 +; RV32-NEXT: lui t0, 2048 +; RV32-NEXT: lui a7, 4096 +; RV32-NEXT: lui a6, 8192 +; RV32-NEXT: lui a5, 16384 +; RV32-NEXT: lui a4, 32768 +; RV32-NEXT: sw a1, 272(sp) +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw zero, 264(sp) +; RV32-NEXT: sw s4, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a3, 260(sp) +; RV32-NEXT: lui a3, 65536 +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw a2, 252(sp) +; RV32-NEXT: lui a2, 131072 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw a0, 244(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vand.vi v28, v12, 2 +; RV32-NEXT: vand.vi v4, v12, 1 +; RV32-NEXT: vand.vi v24, v12, 4 +; RV32-NEXT: vand.vi v20, v12, 8 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw s3, 236(sp) +; RV32-NEXT: vand.vx v16, v12, s3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi s3, sp, 272 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw s2, 228(sp) +; RV32-NEXT: vand.vx v0, v12, s2 +; RV32-NEXT: addi s2, sp, 264 +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw s5, 220(sp) +; RV32-NEXT: vmul.vv v16, v8, v28 +; RV32-NEXT: vmul.vv v28, v8, v4 +; RV32-NEXT: vxor.vi v28, v28, 0 +; RV32-NEXT: vxor.vv v28, v28, v16 +; RV32-NEXT: vand.vx v16, v12, s5 +; RV32-NEXT: addi s5, sp, 256 +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw s6, 212(sp) +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v28, v28, v24 +; RV32-NEXT: vand.vx v24, v12, s6 +; RV32-NEXT: addi s6, sp, 248 +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw s8, 204(sp) +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v28, v20 +; RV32-NEXT: vand.vx v28, v12, s8 +; RV32-NEXT: addi s8, sp, 240 +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s1, 196(sp) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v20, v20, v4 +; RV32-NEXT: vand.vx v4, v12, s1 +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw s7, 188(sp) +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v20, v20, v0 +; RV32-NEXT: vand.vx v0, v12, s7 +; RV32-NEXT: slli a0, s4, 11 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v20, v20, v16 +; RV32-NEXT: vand.vx v16, v12, ra +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a0, 180(sp) +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw ra, 172(sp) +; RV32-NEXT: addi s4, sp, 216 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v24, v20, v24 +; RV32-NEXT: vand.vx v20, v12, s11 +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw s11, 164(sp) +; RV32-NEXT: addi s11, sp, 208 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v28, v24, v28 +; RV32-NEXT: vand.vx v24, v12, s10 +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw s10, 156(sp) +; RV32-NEXT: addi s10, sp, 200 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v4, v28, v4 +; RV32-NEXT: vand.vx v28, v12, s9 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw s9, 148(sp) +; RV32-NEXT: addi s9, sp, 192 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v4, v4, v0 +; RV32-NEXT: vand.vx v0, v12, a0 +; RV32-NEXT: addi ra, sp, 184 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v0, v4, v0 +; RV32-NEXT: vand.vx v4, v12, s0 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw s0, 140(sp) +; RV32-NEXT: addi s1, sp, 176 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v0, v0, v16 +; RV32-NEXT: vand.vx v16, v12, t6 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw t6, 132(sp) +; RV32-NEXT: addi s0, sp, 168 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v0, v0, v20 +; RV32-NEXT: vand.vx v20, v12, t5 +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t5, 124(sp) +; RV32-NEXT: addi t6, sp, 160 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: vand.vx v24, v12, t4 +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t4, 116(sp) +; RV32-NEXT: addi t5, sp, 152 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v0, v0, v28 +; RV32-NEXT: vand.vx v28, v12, t3 +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t3, 108(sp) +; RV32-NEXT: addi t4, sp, 144 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: vand.vx v4, v12, t2 +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw t2, 100(sp) +; RV32-NEXT: addi t3, sp, 136 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v16, v0, v16 +; RV32-NEXT: vand.vx v0, v12, t1 +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw t1, 92(sp) +; RV32-NEXT: addi t2, sp, 128 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v16, v20 +; RV32-NEXT: vand.vx v16, v12, t0 +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw t0, 84(sp) +; RV32-NEXT: addi t1, sp, 120 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v24, v20, v24 +; RV32-NEXT: vand.vx v20, v12, a7 +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw a7, 76(sp) +; RV32-NEXT: addi t0, sp, 112 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v24, v24, v28 +; RV32-NEXT: vand.vx v28, v12, a6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw a6, 68(sp) +; RV32-NEXT: addi a7, sp, 104 +; RV32-NEXT: vmul.vv v28, v8, v4 +; RV32-NEXT: vxor.vv v24, v24, v28 +; RV32-NEXT: vand.vx v28, v12, a5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw a5, 60(sp) +; RV32-NEXT: addi a6, sp, 96 +; RV32-NEXT: vmul.vv v28, v8, v0 +; RV32-NEXT: vxor.vv v28, v24, v28 +; RV32-NEXT: vand.vx v24, v12, a4 +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw a4, 52(sp) +; RV32-NEXT: addi a5, sp, 88 +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v16, v28, v16 +; RV32-NEXT: vand.vx v28, v12, a3 +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw a3, 44(sp) +; RV32-NEXT: addi a4, sp, 80 +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v4, v12, a2 +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: addi a3, sp, 72 +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: lui a1, 262144 +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: addi a2, sp, 64 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: vxor.vv v20, v16, v20 +; RV32-NEXT: vlse64.v v16, (s3), zero +; RV32-NEXT: addi s3, sp, 56 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vxor.vv v0, v20, v0 +; RV32-NEXT: vlse64.v v20, (s2), zero +; RV32-NEXT: addi s2, sp, 48 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: vxor.vv v0, v0, v24 +; RV32-NEXT: vlse64.v v24, (s5), zero +; RV32-NEXT: addi s5, sp, 40 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: vxor.vv v0, v0, v28 +; RV32-NEXT: vlse64.v v28, (s6), zero +; RV32-NEXT: addi s6, sp, 32 +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: vxor.vv v4, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v4, (s8), zero +; RV32-NEXT: addi s8, sp, 24 +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s7, s7, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi s7, sp, 232 +; RV32-NEXT: vlse64.v v16, (s7), zero +; RV32-NEXT: addi s7, sp, 224 +; RV32-NEXT: vlse64.v v20, (s7), zero +; RV32-NEXT: vlse64.v v24, (s4), zero +; RV32-NEXT: vlse64.v v28, (s11), zero +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 4 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: mv s7, s4 +; RV32-NEXT: slli s4, s4, 1 +; RV32-NEXT: add s7, s7, s4 +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: add s4, s4, s7 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: mv s7, s4 +; RV32-NEXT: slli s4, s4, 4 +; RV32-NEXT: add s4, s4, s7 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr s4, vlenb +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: mv s7, s4 +; RV32-NEXT: slli s4, s4, 1 +; RV32-NEXT: add s7, s7, s4 +; RV32-NEXT: slli s4, s4, 1 +; RV32-NEXT: add s7, s7, s4 +; RV32-NEXT: slli s4, s4, 2 +; RV32-NEXT: add s4, s4, s7 +; RV32-NEXT: add s4, sp, s4 +; RV32-NEXT: addi s4, s4, 288 +; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v20, (s10), zero +; RV32-NEXT: vlse64.v v24, (s9), zero +; RV32-NEXT: vlse64.v v28, (ra), zero +; RV32-NEXT: vlse64.v v4, (s1), zero +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 2 +; RV32-NEXT: mv s4, s1 +; RV32-NEXT: slli s1, s1, 1 +; RV32-NEXT: add s1, s1, s4 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 3 +; RV32-NEXT: mv s4, s1 +; RV32-NEXT: slli s1, s1, 2 +; RV32-NEXT: add s1, s1, s4 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 6 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v4 +; RV32-NEXT: csrr s1, vlenb +; RV32-NEXT: slli s1, s1, 3 +; RV32-NEXT: mv s4, s1 +; RV32-NEXT: slli s1, s1, 1 +; RV32-NEXT: add s4, s4, s1 +; RV32-NEXT: slli s1, s1, 2 +; RV32-NEXT: add s1, s1, s4 +; RV32-NEXT: add s1, sp, s1 +; RV32-NEXT: addi s1, s1, 288 +; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v24, (s0), zero +; RV32-NEXT: vlse64.v v28, (t6), zero +; RV32-NEXT: vlse64.v v4, (t5), zero +; RV32-NEXT: vlse64.v v0, (t4), zero +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 3 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: mv t5, t4 +; RV32-NEXT: slli t4, t4, 3 +; RV32-NEXT: add t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v4 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: mv t5, t4 +; RV32-NEXT: slli t4, t4, 1 +; RV32-NEXT: add t5, t5, t4 +; RV32-NEXT: slli t4, t4, 1 +; RV32-NEXT: add t5, t5, t4 +; RV32-NEXT: slli t4, t4, 1 +; RV32-NEXT: add t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v0 +; RV32-NEXT: csrr t4, vlenb +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: mv t5, t4 +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: add t5, t5, t4 +; RV32-NEXT: slli t4, t4, 2 +; RV32-NEXT: add t4, t4, t5 +; RV32-NEXT: add t4, sp, t4 +; RV32-NEXT: addi t4, t4, 288 +; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v28, (t3), zero +; RV32-NEXT: vlse64.v v4, (t2), zero +; RV32-NEXT: vlse64.v v0, (t1), zero +; RV32-NEXT: vlse64.v v16, (t0), zero +; RV32-NEXT: vand.vv v20, v12, v28 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 2 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v20, v12, v4 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 5 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v20, v12, v0 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 3 +; RV32-NEXT: mv t1, t0 +; RV32-NEXT: slli t0, t0, 1 +; RV32-NEXT: add t1, t1, t0 +; RV32-NEXT: slli t0, t0, 1 +; RV32-NEXT: add t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: csrr t0, vlenb +; RV32-NEXT: slli t0, t0, 4 +; RV32-NEXT: mv t1, t0 +; RV32-NEXT: slli t0, t0, 2 +; RV32-NEXT: add t0, t0, t1 +; RV32-NEXT: add t0, sp, t0 +; RV32-NEXT: addi t0, t0, 288 +; RV32-NEXT: vs4r.v v16, (t0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v16, (a7), zero +; RV32-NEXT: vlse64.v v0, (a6), zero +; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vlse64.v v24, (a4), zero +; RV32-NEXT: vand.vv v4, v12, v16 +; RV32-NEXT: vand.vv v16, v12, v0 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: mv a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 288 +; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: mv a5, a4 +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 288 +; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: mv a5, a4 +; RV32-NEXT: slli a4, a4, 1 +; RV32-NEXT: add a5, a5, a4 +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 288 +; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (s3), zero +; RV32-NEXT: vlse64.v v28, (s2), zero +; RV32-NEXT: vand.vv v0, v12, v16 +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 288 +; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v24 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 288 +; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v28 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: mv a3, a2 +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 288 +; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill +; RV32-NEXT: vlse64.v v16, (s5), zero +; RV32-NEXT: vlse64.v v20, (s6), zero +; RV32-NEXT: vlse64.v v24, (s8), zero +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vand.vv v16, v12, v16 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v16, v12, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v24, v12, v24 +; RV32-NEXT: vand.vv v20, v12, v28 +; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v12, v16, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v16, v8, v4 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v16, v8, v0 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v16, v8, v16 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v16, v8, v24 +; RV32-NEXT: vxor.vv v12, v12, v16 +; RV32-NEXT: vmul.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v12, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmul_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vand.vi v16, v12, 2 +; RV64-NEXT: vand.vi v20, v12, 1 +; RV64-NEXT: vmul.vv v16, v8, v16 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v20, v16 +; RV64-NEXT: vand.vi v20, v12, 4 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vi v20, v12, 8 +; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: li a0, 256 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: li a2, 1024 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a2 +; RV64-NEXT: slli a1, a0, 11 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 32 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 64 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 128 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 256 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 512 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 1024 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 2048 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 4096 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 8192 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 16384 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 65536 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 131072 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: lui a1, 262144 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 31 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 33 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 34 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 35 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 36 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 37 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 38 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 39 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 40 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 41 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 42 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 43 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 44 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 45 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 46 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 47 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 50 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 52 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 54 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 55 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 56 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 60 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a1 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: slli a1, a1, 63 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vand.vx v20, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 +; RV64-NEXT: vmul.vv v20, v8, v20 +; RV64-NEXT: vxor.vv v16, v16, v20 +; RV64-NEXT: vmul.vv v8, v8, v12 +; RV64-NEXT: vxor.vv v8, v16, v8 +; RV64-NEXT: ret + %a = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %x, <8 x i64> %y) + ret <8 x i64> %a +} + +define <1 x i32> @clmulr_v1i32(<1 x i32> %x, <1 x i32> %y) nounwind { +; CHECK-LABEL: clmulr_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a4, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vsll.vi v11, v8, 24 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: lui a5, 349525 +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: addi a3, a4, -256 +; CHECK-NEXT: addi a2, a0, -241 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: addi a0, a5, 1365 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v11, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vand.vx v9, v8, a6 +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: vand.vx v10, v8, a5 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vand.vx v11, v8, a5 +; CHECK-NEXT: li a5, 128 +; CHECK-NEXT: vand.vx v12, v8, a5 +; CHECK-NEXT: li a5, 256 +; CHECK-NEXT: vand.vx v13, v8, a5 +; CHECK-NEXT: li a5, 512 +; CHECK-NEXT: vand.vx v14, v8, a5 +; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: vand.vx v15, v8, a5 +; CHECK-NEXT: li a5, 1 +; CHECK-NEXT: slli a5, a5, 11 +; CHECK-NEXT: vand.vx v16, v8, a5 +; CHECK-NEXT: lui a5, 1 +; CHECK-NEXT: vand.vx v17, v8, a5 +; CHECK-NEXT: lui a5, 2 +; CHECK-NEXT: vand.vx v18, v8, a5 +; CHECK-NEXT: lui a5, 4 +; CHECK-NEXT: vand.vx v19, v8, a5 +; CHECK-NEXT: lui a5, 8 +; CHECK-NEXT: vand.vx v20, v8, a5 +; CHECK-NEXT: lui a5, 32 +; CHECK-NEXT: vand.vx v21, v8, a4 +; CHECK-NEXT: lui a4, 64 +; CHECK-NEXT: vand.vx v22, v8, a5 +; CHECK-NEXT: lui a5, 128 +; CHECK-NEXT: vand.vx v23, v8, a4 +; CHECK-NEXT: lui a4, 256 +; CHECK-NEXT: vand.vx v24, v8, a5 +; CHECK-NEXT: lui a5, 512 +; CHECK-NEXT: vand.vx v25, v8, a4 +; CHECK-NEXT: lui a4, 1024 +; CHECK-NEXT: vand.vx v26, v8, a5 +; CHECK-NEXT: lui a5, 2048 +; CHECK-NEXT: vand.vx v27, v8, a4 +; CHECK-NEXT: lui a4, 4096 +; CHECK-NEXT: vand.vx v28, v8, a5 +; CHECK-NEXT: lui a5, 8192 +; CHECK-NEXT: vand.vx v29, v8, a4 +; CHECK-NEXT: lui a4, 16384 +; CHECK-NEXT: vand.vx v30, v8, a5 +; CHECK-NEXT: lui a5, 32768 +; CHECK-NEXT: vand.vx v31, v8, a4 +; CHECK-NEXT: lui a4, 65536 +; CHECK-NEXT: vand.vx v7, v8, a5 +; CHECK-NEXT: lui a5, 131072 +; CHECK-NEXT: vand.vx v6, v8, a4 +; CHECK-NEXT: lui a4, 262144 +; CHECK-NEXT: vand.vx v5, v8, a5 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: vand.vi v4, v8, 2 +; CHECK-NEXT: vand.vi v3, v8, 1 +; CHECK-NEXT: vand.vi v2, v8, 4 +; CHECK-NEXT: vand.vi v1, v8, 8 +; CHECK-NEXT: vand.vx v0, v8, a4 +; CHECK-NEXT: vmul.vv v4, v8, v4 +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs1r.v v4, (a4) # vscale x 8-byte Folded Spill +; CHECK-NEXT: vmul.vv v3, v8, v3 +; CHECK-NEXT: vmul.vv v2, v8, v2 +; CHECK-NEXT: vmul.vv v1, v8, v1 +; CHECK-NEXT: vmul.vv v9, v8, v9 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vmul.vv v13, v8, v13 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vmul.vv v15, v8, v15 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vmul.vv v17, v8, v17 +; CHECK-NEXT: vmul.vv v18, v8, v18 +; CHECK-NEXT: vmul.vv v19, v8, v19 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vmul.vv v21, v8, v21 +; CHECK-NEXT: vmul.vv v22, v8, v22 +; CHECK-NEXT: vmul.vv v23, v8, v23 +; CHECK-NEXT: vmul.vv v24, v8, v24 +; CHECK-NEXT: vmul.vv v25, v8, v25 +; CHECK-NEXT: vmul.vv v26, v8, v26 +; CHECK-NEXT: vmul.vv v27, v8, v27 +; CHECK-NEXT: vmul.vv v28, v8, v28 +; CHECK-NEXT: vmul.vv v29, v8, v29 +; CHECK-NEXT: vmul.vv v30, v8, v30 +; CHECK-NEXT: vmul.vv v31, v8, v31 +; CHECK-NEXT: vmul.vv v7, v8, v7 +; CHECK-NEXT: vmul.vv v6, v8, v6 +; CHECK-NEXT: vmul.vv v5, v8, v5 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vand.vx v4, v8, a5 +; CHECK-NEXT: vmul.vv v8, v8, v4 +; CHECK-NEXT: vl1r.v v4, (a4) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vxor.vv v4, v3, v4 +; CHECK-NEXT: vxor.vv v4, v4, v2 +; CHECK-NEXT: vxor.vv v4, v4, v1 +; CHECK-NEXT: vxor.vv v9, v4, v9 +; CHECK-NEXT: vxor.vv v9, v9, v10 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vxor.vv v9, v9, v12 +; CHECK-NEXT: vxor.vv v9, v9, v13 +; CHECK-NEXT: vxor.vv v9, v9, v14 +; CHECK-NEXT: vxor.vv v9, v9, v15 +; CHECK-NEXT: vxor.vv v9, v9, v16 +; CHECK-NEXT: vxor.vv v9, v9, v17 +; CHECK-NEXT: vxor.vv v9, v9, v18 +; CHECK-NEXT: vxor.vv v9, v9, v19 +; CHECK-NEXT: vxor.vv v9, v9, v20 +; CHECK-NEXT: vxor.vv v9, v9, v21 +; CHECK-NEXT: vxor.vv v9, v9, v22 +; CHECK-NEXT: vxor.vv v9, v9, v23 +; CHECK-NEXT: vxor.vv v9, v9, v24 +; CHECK-NEXT: vxor.vv v9, v9, v25 +; CHECK-NEXT: vxor.vv v9, v9, v26 +; CHECK-NEXT: vxor.vv v9, v9, v27 +; CHECK-NEXT: vxor.vv v9, v9, v28 +; CHECK-NEXT: vxor.vv v9, v9, v29 +; CHECK-NEXT: vxor.vv v9, v9, v30 +; CHECK-NEXT: vxor.vv v9, v9, v31 +; CHECK-NEXT: vxor.vv v9, v9, v7 +; CHECK-NEXT: vxor.vv v9, v9, v6 +; CHECK-NEXT: vxor.vv v9, v9, v5 +; CHECK-NEXT: vxor.vv v9, v9, v0 +; CHECK-NEXT: vxor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v10, v8, 24 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %a = call <1 x i32> @llvm.clmulr.v1i32(<1 x i32> %x, <1 x i32> %y) + ret <1 x i32> %a +} + +define <2 x i32> @clmulr_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { +; CHECK-LABEL: clmulr_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a4, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vsll.vi v11, v8, 24 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: lui a5, 349525 +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: addi a3, a4, -256 +; CHECK-NEXT: addi a2, a0, -241 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: addi a0, a5, 1365 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v11, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vand.vx v9, v8, a6 +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: vand.vx v10, v8, a5 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vand.vx v11, v8, a5 +; CHECK-NEXT: li a5, 128 +; CHECK-NEXT: vand.vx v12, v8, a5 +; CHECK-NEXT: li a5, 256 +; CHECK-NEXT: vand.vx v13, v8, a5 +; CHECK-NEXT: li a5, 512 +; CHECK-NEXT: vand.vx v14, v8, a5 +; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: vand.vx v15, v8, a5 +; CHECK-NEXT: li a5, 1 +; CHECK-NEXT: slli a5, a5, 11 +; CHECK-NEXT: vand.vx v16, v8, a5 +; CHECK-NEXT: lui a5, 1 +; CHECK-NEXT: vand.vx v17, v8, a5 +; CHECK-NEXT: lui a5, 2 +; CHECK-NEXT: vand.vx v18, v8, a5 +; CHECK-NEXT: lui a5, 4 +; CHECK-NEXT: vand.vx v19, v8, a5 +; CHECK-NEXT: lui a5, 8 +; CHECK-NEXT: vand.vx v20, v8, a5 +; CHECK-NEXT: lui a5, 32 +; CHECK-NEXT: vand.vx v21, v8, a4 +; CHECK-NEXT: lui a4, 64 +; CHECK-NEXT: vand.vx v22, v8, a5 +; CHECK-NEXT: lui a5, 128 +; CHECK-NEXT: vand.vx v23, v8, a4 +; CHECK-NEXT: lui a4, 256 +; CHECK-NEXT: vand.vx v24, v8, a5 +; CHECK-NEXT: lui a5, 512 +; CHECK-NEXT: vand.vx v25, v8, a4 +; CHECK-NEXT: lui a4, 1024 +; CHECK-NEXT: vand.vx v26, v8, a5 +; CHECK-NEXT: lui a5, 2048 +; CHECK-NEXT: vand.vx v27, v8, a4 +; CHECK-NEXT: lui a4, 4096 +; CHECK-NEXT: vand.vx v28, v8, a5 +; CHECK-NEXT: lui a5, 8192 +; CHECK-NEXT: vand.vx v29, v8, a4 +; CHECK-NEXT: lui a4, 16384 +; CHECK-NEXT: vand.vx v30, v8, a5 +; CHECK-NEXT: lui a5, 32768 +; CHECK-NEXT: vand.vx v31, v8, a4 +; CHECK-NEXT: lui a4, 65536 +; CHECK-NEXT: vand.vx v7, v8, a5 +; CHECK-NEXT: lui a5, 131072 +; CHECK-NEXT: vand.vx v6, v8, a4 +; CHECK-NEXT: lui a4, 262144 +; CHECK-NEXT: vand.vx v5, v8, a5 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: vand.vi v4, v8, 2 +; CHECK-NEXT: vand.vi v3, v8, 1 +; CHECK-NEXT: vand.vi v2, v8, 4 +; CHECK-NEXT: vand.vi v1, v8, 8 +; CHECK-NEXT: vand.vx v0, v8, a4 +; CHECK-NEXT: vmul.vv v4, v8, v4 +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs1r.v v4, (a4) # vscale x 8-byte Folded Spill +; CHECK-NEXT: vmul.vv v3, v8, v3 +; CHECK-NEXT: vmul.vv v2, v8, v2 +; CHECK-NEXT: vmul.vv v1, v8, v1 +; CHECK-NEXT: vmul.vv v9, v8, v9 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vmul.vv v13, v8, v13 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vmul.vv v15, v8, v15 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vmul.vv v17, v8, v17 +; CHECK-NEXT: vmul.vv v18, v8, v18 +; CHECK-NEXT: vmul.vv v19, v8, v19 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vmul.vv v21, v8, v21 +; CHECK-NEXT: vmul.vv v22, v8, v22 +; CHECK-NEXT: vmul.vv v23, v8, v23 +; CHECK-NEXT: vmul.vv v24, v8, v24 +; CHECK-NEXT: vmul.vv v25, v8, v25 +; CHECK-NEXT: vmul.vv v26, v8, v26 +; CHECK-NEXT: vmul.vv v27, v8, v27 +; CHECK-NEXT: vmul.vv v28, v8, v28 +; CHECK-NEXT: vmul.vv v29, v8, v29 +; CHECK-NEXT: vmul.vv v30, v8, v30 +; CHECK-NEXT: vmul.vv v31, v8, v31 +; CHECK-NEXT: vmul.vv v7, v8, v7 +; CHECK-NEXT: vmul.vv v6, v8, v6 +; CHECK-NEXT: vmul.vv v5, v8, v5 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vand.vx v4, v8, a5 +; CHECK-NEXT: vmul.vv v8, v8, v4 +; CHECK-NEXT: vl1r.v v4, (a4) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vxor.vv v4, v3, v4 +; CHECK-NEXT: vxor.vv v4, v4, v2 +; CHECK-NEXT: vxor.vv v4, v4, v1 +; CHECK-NEXT: vxor.vv v9, v4, v9 +; CHECK-NEXT: vxor.vv v9, v9, v10 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vxor.vv v9, v9, v12 +; CHECK-NEXT: vxor.vv v9, v9, v13 +; CHECK-NEXT: vxor.vv v9, v9, v14 +; CHECK-NEXT: vxor.vv v9, v9, v15 +; CHECK-NEXT: vxor.vv v9, v9, v16 +; CHECK-NEXT: vxor.vv v9, v9, v17 +; CHECK-NEXT: vxor.vv v9, v9, v18 +; CHECK-NEXT: vxor.vv v9, v9, v19 +; CHECK-NEXT: vxor.vv v9, v9, v20 +; CHECK-NEXT: vxor.vv v9, v9, v21 +; CHECK-NEXT: vxor.vv v9, v9, v22 +; CHECK-NEXT: vxor.vv v9, v9, v23 +; CHECK-NEXT: vxor.vv v9, v9, v24 +; CHECK-NEXT: vxor.vv v9, v9, v25 +; CHECK-NEXT: vxor.vv v9, v9, v26 +; CHECK-NEXT: vxor.vv v9, v9, v27 +; CHECK-NEXT: vxor.vv v9, v9, v28 +; CHECK-NEXT: vxor.vv v9, v9, v29 +; CHECK-NEXT: vxor.vv v9, v9, v30 +; CHECK-NEXT: vxor.vv v9, v9, v31 +; CHECK-NEXT: vxor.vv v9, v9, v7 +; CHECK-NEXT: vxor.vv v9, v9, v6 +; CHECK-NEXT: vxor.vv v9, v9, v5 +; CHECK-NEXT: vxor.vv v9, v9, v0 +; CHECK-NEXT: vxor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v10, v8, 24 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %a = call <2 x i32> @llvm.clmulr.v2i32(<2 x i32> %x, <2 x i32> %y) + ret <2 x i32> %a +} + +define <4 x i32> @clmulr_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { +; CHECK-LABEL: clmulr_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: lui a4, 16 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vsll.vi v11, v8, 24 +; CHECK-NEXT: lui a0, 61681 +; CHECK-NEXT: lui a1, 209715 +; CHECK-NEXT: lui a5, 349525 +; CHECK-NEXT: li a6, 16 +; CHECK-NEXT: addi a3, a4, -256 +; CHECK-NEXT: addi a2, a0, -241 +; CHECK-NEXT: addi a1, a1, 819 +; CHECK-NEXT: addi a0, a5, 1365 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v11, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vand.vx v9, v8, a6 +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: vand.vx v10, v8, a5 +; CHECK-NEXT: li a5, 64 +; CHECK-NEXT: vand.vx v11, v8, a5 +; CHECK-NEXT: li a5, 128 +; CHECK-NEXT: vand.vx v12, v8, a5 +; CHECK-NEXT: li a5, 256 +; CHECK-NEXT: vand.vx v13, v8, a5 +; CHECK-NEXT: li a5, 512 +; CHECK-NEXT: vand.vx v14, v8, a5 +; CHECK-NEXT: li a5, 1024 +; CHECK-NEXT: vand.vx v15, v8, a5 +; CHECK-NEXT: li a5, 1 +; CHECK-NEXT: slli a5, a5, 11 +; CHECK-NEXT: vand.vx v16, v8, a5 +; CHECK-NEXT: lui a5, 1 +; CHECK-NEXT: vand.vx v17, v8, a5 +; CHECK-NEXT: lui a5, 2 +; CHECK-NEXT: vand.vx v18, v8, a5 +; CHECK-NEXT: lui a5, 4 +; CHECK-NEXT: vand.vx v19, v8, a5 +; CHECK-NEXT: lui a5, 8 +; CHECK-NEXT: vand.vx v20, v8, a5 +; CHECK-NEXT: lui a5, 32 +; CHECK-NEXT: vand.vx v21, v8, a4 +; CHECK-NEXT: lui a4, 64 +; CHECK-NEXT: vand.vx v22, v8, a5 +; CHECK-NEXT: lui a5, 128 +; CHECK-NEXT: vand.vx v23, v8, a4 +; CHECK-NEXT: lui a4, 256 +; CHECK-NEXT: vand.vx v24, v8, a5 +; CHECK-NEXT: lui a5, 512 +; CHECK-NEXT: vand.vx v25, v8, a4 +; CHECK-NEXT: lui a4, 1024 +; CHECK-NEXT: vand.vx v26, v8, a5 +; CHECK-NEXT: lui a5, 2048 +; CHECK-NEXT: vand.vx v27, v8, a4 +; CHECK-NEXT: lui a4, 4096 +; CHECK-NEXT: vand.vx v28, v8, a5 +; CHECK-NEXT: lui a5, 8192 +; CHECK-NEXT: vand.vx v29, v8, a4 +; CHECK-NEXT: lui a4, 16384 +; CHECK-NEXT: vand.vx v30, v8, a5 +; CHECK-NEXT: lui a5, 32768 +; CHECK-NEXT: vand.vx v31, v8, a4 +; CHECK-NEXT: lui a4, 65536 +; CHECK-NEXT: vand.vx v7, v8, a5 +; CHECK-NEXT: lui a5, 131072 +; CHECK-NEXT: vand.vx v6, v8, a4 +; CHECK-NEXT: lui a4, 262144 +; CHECK-NEXT: vand.vx v5, v8, a5 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: vand.vi v4, v8, 2 +; CHECK-NEXT: vand.vi v3, v8, 1 +; CHECK-NEXT: vand.vi v2, v8, 4 +; CHECK-NEXT: vand.vi v1, v8, 8 +; CHECK-NEXT: vand.vx v0, v8, a4 +; CHECK-NEXT: vmul.vv v4, v8, v4 +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs1r.v v4, (a4) # vscale x 8-byte Folded Spill +; CHECK-NEXT: vmul.vv v3, v8, v3 +; CHECK-NEXT: vmul.vv v2, v8, v2 +; CHECK-NEXT: vmul.vv v1, v8, v1 +; CHECK-NEXT: vmul.vv v9, v8, v9 +; CHECK-NEXT: vmul.vv v10, v8, v10 +; CHECK-NEXT: vmul.vv v11, v8, v11 +; CHECK-NEXT: vmul.vv v12, v8, v12 +; CHECK-NEXT: vmul.vv v13, v8, v13 +; CHECK-NEXT: vmul.vv v14, v8, v14 +; CHECK-NEXT: vmul.vv v15, v8, v15 +; CHECK-NEXT: vmul.vv v16, v8, v16 +; CHECK-NEXT: vmul.vv v17, v8, v17 +; CHECK-NEXT: vmul.vv v18, v8, v18 +; CHECK-NEXT: vmul.vv v19, v8, v19 +; CHECK-NEXT: vmul.vv v20, v8, v20 +; CHECK-NEXT: vmul.vv v21, v8, v21 +; CHECK-NEXT: vmul.vv v22, v8, v22 +; CHECK-NEXT: vmul.vv v23, v8, v23 +; CHECK-NEXT: vmul.vv v24, v8, v24 +; CHECK-NEXT: vmul.vv v25, v8, v25 +; CHECK-NEXT: vmul.vv v26, v8, v26 +; CHECK-NEXT: vmul.vv v27, v8, v27 +; CHECK-NEXT: vmul.vv v28, v8, v28 +; CHECK-NEXT: vmul.vv v29, v8, v29 +; CHECK-NEXT: vmul.vv v30, v8, v30 +; CHECK-NEXT: vmul.vv v31, v8, v31 +; CHECK-NEXT: vmul.vv v7, v8, v7 +; CHECK-NEXT: vmul.vv v6, v8, v6 +; CHECK-NEXT: vmul.vv v5, v8, v5 +; CHECK-NEXT: vmul.vv v0, v8, v0 +; CHECK-NEXT: vand.vx v4, v8, a5 +; CHECK-NEXT: vmul.vv v8, v8, v4 +; CHECK-NEXT: vl1r.v v4, (a4) # vscale x 8-byte Folded Reload +; CHECK-NEXT: vxor.vv v4, v3, v4 +; CHECK-NEXT: vxor.vv v4, v4, v2 +; CHECK-NEXT: vxor.vv v4, v4, v1 +; CHECK-NEXT: vxor.vv v9, v4, v9 +; CHECK-NEXT: vxor.vv v9, v9, v10 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vxor.vv v9, v9, v12 +; CHECK-NEXT: vxor.vv v9, v9, v13 +; CHECK-NEXT: vxor.vv v9, v9, v14 +; CHECK-NEXT: vxor.vv v9, v9, v15 +; CHECK-NEXT: vxor.vv v9, v9, v16 +; CHECK-NEXT: vxor.vv v9, v9, v17 +; CHECK-NEXT: vxor.vv v9, v9, v18 +; CHECK-NEXT: vxor.vv v9, v9, v19 +; CHECK-NEXT: vxor.vv v9, v9, v20 +; CHECK-NEXT: vxor.vv v9, v9, v21 +; CHECK-NEXT: vxor.vv v9, v9, v22 +; CHECK-NEXT: vxor.vv v9, v9, v23 +; CHECK-NEXT: vxor.vv v9, v9, v24 +; CHECK-NEXT: vxor.vv v9, v9, v25 +; CHECK-NEXT: vxor.vv v9, v9, v26 +; CHECK-NEXT: vxor.vv v9, v9, v27 +; CHECK-NEXT: vxor.vv v9, v9, v28 +; CHECK-NEXT: vxor.vv v9, v9, v29 +; CHECK-NEXT: vxor.vv v9, v9, v30 +; CHECK-NEXT: vxor.vv v9, v9, v31 +; CHECK-NEXT: vxor.vv v9, v9, v7 +; CHECK-NEXT: vxor.vv v9, v9, v6 +; CHECK-NEXT: vxor.vv v9, v9, v5 +; CHECK-NEXT: vxor.vv v9, v9, v0 +; CHECK-NEXT: vxor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 8 +; CHECK-NEXT: vsrl.vi v10, v8, 24 +; CHECK-NEXT: vand.vx v9, v9, a3 +; CHECK-NEXT: vor.vv v9, v9, v10 +; CHECK-NEXT: vsll.vi v10, v8, 24 +; CHECK-NEXT: vand.vx v8, v8, a3 +; CHECK-NEXT: vsll.vi v8, v8, 8 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: vor.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vi v9, v8, 4 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsll.vi v8, v8, 4 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 2 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vsrl.vi v9, v8, 1 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %a = call <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %x, <4 x i32> %y) + ret <4 x i32> %a +} + +define <8 x i32> @clmulr_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { +; RV32-LABEL: clmulr_v8i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -64 +; RV32-NEXT: sw s0, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 52(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 48(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: lui a0, 16 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsll.vi v14, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: lui s6, 349525 +; RV32-NEXT: li t2, 16 +; RV32-NEXT: li t6, 32 +; RV32-NEXT: li s3, 64 +; RV32-NEXT: li s5, 128 +; RV32-NEXT: li s4, 256 +; RV32-NEXT: li s2, 512 +; RV32-NEXT: li s1, 1024 +; RV32-NEXT: li s0, 1 +; RV32-NEXT: lui t5, 1 +; RV32-NEXT: lui t4, 2 +; RV32-NEXT: lui t3, 4 +; RV32-NEXT: lui a5, 8 +; RV32-NEXT: lui a6, 32 +; RV32-NEXT: lui a7, 64 +; RV32-NEXT: lui t0, 128 +; RV32-NEXT: lui t1, 256 +; RV32-NEXT: addi a4, a0, -256 +; RV32-NEXT: addi a3, a1, -241 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: addi a1, s6, 1365 +; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v14, v8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vand.vx v10, v8, t2 +; RV32-NEXT: lui t2, 512 +; RV32-NEXT: vand.vx v12, v8, t6 +; RV32-NEXT: lui t6, 1024 +; RV32-NEXT: vand.vx v14, v8, s3 +; RV32-NEXT: lui s3, 2048 +; RV32-NEXT: vand.vx v16, v8, s5 +; RV32-NEXT: lui s5, 4096 +; RV32-NEXT: vand.vx v26, v8, s4 +; RV32-NEXT: lui s4, 8192 +; RV32-NEXT: vand.vx v28, v8, s2 +; RV32-NEXT: lui s2, 16384 +; RV32-NEXT: vand.vx v18, v8, s1 +; RV32-NEXT: lui s1, 32768 +; RV32-NEXT: slli s0, s0, 11 +; RV32-NEXT: vand.vx v20, v8, s0 +; RV32-NEXT: lui s0, 65536 +; RV32-NEXT: vand.vx v22, v8, t5 +; RV32-NEXT: lui t5, 131072 +; RV32-NEXT: vand.vx v24, v8, t4 +; RV32-NEXT: lui t4, 262144 +; RV32-NEXT: vand.vx v30, v8, t3 +; RV32-NEXT: lui t3, 524288 +; RV32-NEXT: vand.vi v6, v8, 2 +; RV32-NEXT: vand.vi v4, v8, 1 +; RV32-NEXT: vand.vi v2, v8, 4 +; RV32-NEXT: vand.vi v0, v8, 8 +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v6, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v6, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v6, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv s6, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, s6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v30 +; RV32-NEXT: csrr s6, vlenb +; RV32-NEXT: slli s6, s6, 1 +; RV32-NEXT: mv a0, s6 +; RV32-NEXT: slli s6, s6, 1 +; RV32-NEXT: add s6, s6, a0 +; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s6, sp, s6 +; RV32-NEXT: addi s6, s6, 32 +; RV32-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 32 +; RV32-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a6 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a7 +; RV32-NEXT: vmul.vv v6, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t0 +; RV32-NEXT: vmul.vv v30, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t1 +; RV32-NEXT: vmul.vv v28, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t2 +; RV32-NEXT: vmul.vv v26, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t6 +; RV32-NEXT: vmul.vv v24, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s3 +; RV32-NEXT: vmul.vv v22, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s5 +; RV32-NEXT: vmul.vv v20, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s4 +; RV32-NEXT: vmul.vv v18, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s2 +; RV32-NEXT: vmul.vv v16, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s1 +; RV32-NEXT: vmul.vv v14, v8, v10 +; RV32-NEXT: vand.vx v10, v8, s0 +; RV32-NEXT: vmul.vv v12, v8, v10 +; RV32-NEXT: vand.vx v10, v8, t5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: vand.vx v0, v8, t4 +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: vand.vx v2, v8, t3 +; RV32-NEXT: vmul.vv v8, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v4, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v2, v2, v4 +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v4, v2, v4 +; RV32-NEXT: vxor.vv v6, v4, v6 +; RV32-NEXT: vxor.vv v30, v6, v30 +; RV32-NEXT: vxor.vv v28, v30, v28 +; RV32-NEXT: vxor.vv v26, v28, v26 +; RV32-NEXT: vxor.vv v24, v26, v24 +; RV32-NEXT: vxor.vv v22, v24, v22 +; RV32-NEXT: vxor.vv v20, v22, v20 +; RV32-NEXT: vxor.vv v18, v20, v18 +; RV32-NEXT: vxor.vv v16, v18, v16 +; RV32-NEXT: vxor.vv v14, v16, v14 +; RV32-NEXT: vxor.vv v12, v14, v12 +; RV32-NEXT: vxor.vv v10, v12, v10 +; RV32-NEXT: vxor.vv v10, v10, v0 +; RV32-NEXT: vxor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsll.vi v12, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v10, v10, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw s0, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 52(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 48(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 64 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_v8i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -96 +; RV64-NEXT: sd s0, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsrl.vi v10, v8, 8 +; RV64-NEXT: lui a0, 16 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: vsll.vi v14, v8, 24 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui s6, 349525 +; RV64-NEXT: li t2, 16 +; RV64-NEXT: li t6, 32 +; RV64-NEXT: li s3, 64 +; RV64-NEXT: li s5, 128 +; RV64-NEXT: li s4, 256 +; RV64-NEXT: li s2, 512 +; RV64-NEXT: li s1, 1024 +; RV64-NEXT: li s0, 1 +; RV64-NEXT: lui t5, 1 +; RV64-NEXT: lui t4, 2 +; RV64-NEXT: lui t3, 4 +; RV64-NEXT: lui a5, 8 +; RV64-NEXT: lui a6, 32 +; RV64-NEXT: lui a7, 64 +; RV64-NEXT: lui t0, 128 +; RV64-NEXT: lui t1, 256 +; RV64-NEXT: addi a4, a0, -256 +; RV64-NEXT: addi a3, a1, -241 +; RV64-NEXT: addi a2, a2, 819 +; RV64-NEXT: addi a1, s6, 1365 +; RV64-NEXT: vand.vx v10, v10, a4 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vor.vv v10, v10, v12 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v10, v10, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, t2 +; RV64-NEXT: lui t2, 512 +; RV64-NEXT: vand.vx v12, v8, t6 +; RV64-NEXT: lui t6, 1024 +; RV64-NEXT: vand.vx v14, v8, s3 +; RV64-NEXT: lui s3, 2048 +; RV64-NEXT: vand.vx v16, v8, s5 +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: vand.vx v26, v8, s4 +; RV64-NEXT: lui s4, 8192 +; RV64-NEXT: vand.vx v28, v8, s2 +; RV64-NEXT: lui s2, 16384 +; RV64-NEXT: vand.vx v18, v8, s1 +; RV64-NEXT: lui s1, 32768 +; RV64-NEXT: slli s0, s0, 11 +; RV64-NEXT: vand.vx v20, v8, s0 +; RV64-NEXT: lui s0, 65536 +; RV64-NEXT: vand.vx v22, v8, t5 +; RV64-NEXT: lui t5, 131072 +; RV64-NEXT: vand.vx v24, v8, t4 +; RV64-NEXT: lui t4, 262144 +; RV64-NEXT: vand.vx v30, v8, t3 +; RV64-NEXT: lui t3, 524288 +; RV64-NEXT: vand.vi v6, v8, 2 +; RV64-NEXT: vand.vi v4, v8, 1 +; RV64-NEXT: vand.vi v2, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v6, v8, v6 +; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v14 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v26 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v18 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v20 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v22 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv s6, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, s6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v24 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v30 +; RV64-NEXT: csrr s6, vlenb +; RV64-NEXT: slli s6, s6, 1 +; RV64-NEXT: mv a0, s6 +; RV64-NEXT: slli s6, s6, 1 +; RV64-NEXT: add s6, s6, a0 +; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s6, sp, s6 +; RV64-NEXT: addi s6, s6, 32 +; RV64-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a0 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a6 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, a7 +; RV64-NEXT: vmul.vv v6, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t0 +; RV64-NEXT: vmul.vv v30, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t1 +; RV64-NEXT: vmul.vv v28, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t2 +; RV64-NEXT: vmul.vv v26, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t6 +; RV64-NEXT: vmul.vv v24, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s3 +; RV64-NEXT: vmul.vv v22, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v20, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s4 +; RV64-NEXT: vmul.vv v18, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s2 +; RV64-NEXT: vmul.vv v16, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s1 +; RV64-NEXT: vmul.vv v14, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s0 +; RV64-NEXT: vmul.vv v12, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vand.vx v0, v8, t4 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vand.vx v2, v8, t3 +; RV64-NEXT: vmul.vv v8, v8, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v4, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v2, v4 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v4, v2, v4 +; RV64-NEXT: vxor.vv v6, v4, v6 +; RV64-NEXT: vxor.vv v30, v6, v30 +; RV64-NEXT: vxor.vv v28, v30, v28 +; RV64-NEXT: vxor.vv v26, v28, v26 +; RV64-NEXT: vxor.vv v24, v26, v24 +; RV64-NEXT: vxor.vv v22, v24, v22 +; RV64-NEXT: vxor.vv v20, v22, v20 +; RV64-NEXT: vxor.vv v18, v20, v18 +; RV64-NEXT: vxor.vv v16, v18, v16 +; RV64-NEXT: vxor.vv v14, v16, v14 +; RV64-NEXT: vxor.vv v12, v14, v12 +; RV64-NEXT: vxor.vv v10, v12, v10 +; RV64-NEXT: vxor.vv v10, v10, v0 +; RV64-NEXT: vxor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 8 +; RV64-NEXT: vsrl.vi v12, v8, 24 +; RV64-NEXT: vand.vx v10, v10, a4 +; RV64-NEXT: vor.vv v10, v10, v12 +; RV64-NEXT: vsll.vi v12, v8, 24 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v10, v10, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v10, v10, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v10, v10, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld s0, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: ret + %a = call <8 x i32> @llvm.clmulr.v8i32(<8 x i32> %x, <8 x i32> %x) + ret <8 x i32> %a +} + +define <16 x i32> @clmulr_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { +; RV32-LABEL: clmulr_v16i32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -80 +; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: lui a5, 16 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsll.vi v20, v8, 24 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: lui ra, 349525 +; RV32-NEXT: li s11, 16 +; RV32-NEXT: li s10, 32 +; RV32-NEXT: li s9, 64 +; RV32-NEXT: li a7, 512 +; RV32-NEXT: li t0, 1024 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: lui t1, 1 +; RV32-NEXT: lui t2, 2 +; RV32-NEXT: lui t3, 4 +; RV32-NEXT: lui t4, 8 +; RV32-NEXT: lui t5, 32 +; RV32-NEXT: lui t6, 64 +; RV32-NEXT: lui s0, 128 +; RV32-NEXT: lui s1, 256 +; RV32-NEXT: lui s2, 512 +; RV32-NEXT: lui s3, 1024 +; RV32-NEXT: lui s4, 2048 +; RV32-NEXT: lui s5, 4096 +; RV32-NEXT: lui s6, 8192 +; RV32-NEXT: lui s7, 16384 +; RV32-NEXT: lui s8, 32768 +; RV32-NEXT: addi a4, a5, -256 +; RV32-NEXT: addi a3, a1, -241 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: addi a1, ra, 1365 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vi v12, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vand.vx v12, v8, s11 +; RV32-NEXT: lui s11, 65536 +; RV32-NEXT: vand.vx v16, v8, s10 +; RV32-NEXT: lui s10, 131072 +; RV32-NEXT: vand.vx v20, v8, s9 +; RV32-NEXT: lui s9, 262144 +; RV32-NEXT: slli ra, a0, 11 +; RV32-NEXT: vand.vi v24, v8, 2 +; RV32-NEXT: vand.vi v28, v8, 1 +; RV32-NEXT: vand.vi v4, v8, 4 +; RV32-NEXT: vand.vi v0, v8, 8 +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v24, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v24, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a4, a4, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: lui a0, 524288 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: vand.vx v12, v8, a6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: mv a6, a4 +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a6, 256 +; RV32-NEXT: vand.vx v12, v8, a6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a4, a6 +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: add a6, a6, a4 +; RV32-NEXT: lw a4, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, a7 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 6 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, ra +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 1 +; RV32-NEXT: add a7, a7, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: slli a6, a6, 2 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, a5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 5 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a6, a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: mv a6, a5 +; RV32-NEXT: slli a5, a5, 1 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 2 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s7 +; RV32-NEXT: vmul.vv v28, v8, v12 +; RV32-NEXT: vand.vx v12, v8, s8 +; RV32-NEXT: vmul.vv v24, v8, v12 +; RV32-NEXT: vand.vx v12, v8, s11 +; RV32-NEXT: vmul.vv v20, v8, v12 +; RV32-NEXT: vand.vx v12, v8, s10 +; RV32-NEXT: vmul.vv v16, v8, v12 +; RV32-NEXT: vand.vx v12, v8, s9 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: vand.vx v0, v8, a0 +; RV32-NEXT: vmul.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v4, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a5, a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v0, v0, v4 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v4, v0, v4 +; RV32-NEXT: vxor.vv v28, v4, v28 +; RV32-NEXT: vxor.vv v24, v28, v24 +; RV32-NEXT: vxor.vv v20, v24, v20 +; RV32-NEXT: vxor.vv v16, v20, v16 +; RV32-NEXT: vxor.vv v12, v16, v12 +; RV32-NEXT: vxor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsll.vi v16, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vi v12, v8, 4 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 2 +; RV32-NEXT: vand.vx v8, v8, a2 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vand.vx v12, v12, a1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 80 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_v16i32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -144 +; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsrl.vi v12, v8, 8 +; RV64-NEXT: lui a5, 16 +; RV64-NEXT: vsrl.vi v16, v8, 24 +; RV64-NEXT: vsll.vi v20, v8, 24 +; RV64-NEXT: lui a1, 61681 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui ra, 349525 +; RV64-NEXT: li s11, 16 +; RV64-NEXT: li s10, 32 +; RV64-NEXT: li s9, 64 +; RV64-NEXT: li a7, 512 +; RV64-NEXT: li t0, 1024 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: lui t1, 1 +; RV64-NEXT: lui t2, 2 +; RV64-NEXT: lui t3, 4 +; RV64-NEXT: lui t4, 8 +; RV64-NEXT: lui t5, 32 +; RV64-NEXT: lui t6, 64 +; RV64-NEXT: lui s0, 128 +; RV64-NEXT: lui s1, 256 +; RV64-NEXT: lui s2, 512 +; RV64-NEXT: lui s3, 1024 +; RV64-NEXT: lui s4, 2048 +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: lui s6, 8192 +; RV64-NEXT: lui s7, 16384 +; RV64-NEXT: lui s8, 32768 +; RV64-NEXT: addi a4, a5, -256 +; RV64-NEXT: addi a3, a1, -241 +; RV64-NEXT: addi a2, a2, 819 +; RV64-NEXT: addi a1, ra, 1365 +; RV64-NEXT: vand.vx v12, v12, a4 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vor.vv v12, v12, v16 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v20, v8 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v12, v12, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v12, v12, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vand.vx v12, v8, s11 +; RV64-NEXT: lui s11, 65536 +; RV64-NEXT: vand.vx v16, v8, s10 +; RV64-NEXT: lui s10, 131072 +; RV64-NEXT: vand.vx v20, v8, s9 +; RV64-NEXT: lui s9, 262144 +; RV64-NEXT: slli ra, a0, 11 +; RV64-NEXT: vand.vi v24, v8, 2 +; RV64-NEXT: vand.vi v28, v8, 1 +; RV64-NEXT: vand.vi v4, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v24, v8, v24 +; RV64-NEXT: sd a4, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v28 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v20 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a4, a4, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui a0, 524288 +; RV64-NEXT: li a6, 128 +; RV64-NEXT: vand.vx v12, v8, a6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 3 +; RV64-NEXT: mv a6, a4 +; RV64-NEXT: slli a4, a4, 3 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill +; RV64-NEXT: li a6, 256 +; RV64-NEXT: vand.vx v12, v8, a6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a4, a6 +; RV64-NEXT: slli a6, a6, 4 +; RV64-NEXT: add a6, a6, a4 +; RV64-NEXT: ld a4, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a7 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 6 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t0 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, ra +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 4 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 1 +; RV64-NEXT: add a7, a7, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: mv a7, a6 +; RV64-NEXT: slli a6, a6, 2 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a6, a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s0 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv a6, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: addi a5, sp, 32 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s7 +; RV64-NEXT: vmul.vv v28, v8, v12 +; RV64-NEXT: vand.vx v12, v8, s8 +; RV64-NEXT: vmul.vv v24, v8, v12 +; RV64-NEXT: vand.vx v12, v8, s11 +; RV64-NEXT: vmul.vv v20, v8, v12 +; RV64-NEXT: vand.vx v12, v8, s10 +; RV64-NEXT: vmul.vv v16, v8, v12 +; RV64-NEXT: vand.vx v12, v8, s9 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: vand.vx v0, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v4, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a5, a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v4, v0, v4 +; RV64-NEXT: vxor.vv v28, v4, v28 +; RV64-NEXT: vxor.vv v24, v28, v24 +; RV64-NEXT: vxor.vv v20, v24, v20 +; RV64-NEXT: vxor.vv v16, v20, v16 +; RV64-NEXT: vxor.vv v12, v16, v12 +; RV64-NEXT: vxor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 8 +; RV64-NEXT: vsrl.vi v16, v8, 24 +; RV64-NEXT: vand.vx v12, v12, a4 +; RV64-NEXT: vor.vv v12, v12, v16 +; RV64-NEXT: vsll.vi v16, v8, 24 +; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vand.vx v12, v12, a3 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vand.vx v12, v12, a2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v12, v12, a1 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 144 +; RV64-NEXT: ret + %a = call <16 x i32> @llvm.clmulr.v16i32(<16 x i32> %x, <16 x i32> %y) + ret <16 x i32> %a +} + +define <1 x i64> @clmulr_v1i64(<1 x i64> %x, <1 x i64> %y) nounwind { +; RV32-LABEL: clmulr_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s7, 1044480 +; RV32-NEXT: lui a7, 524288 +; RV32-NEXT: li s11, 1 +; RV32-NEXT: li s8, 2 +; RV32-NEXT: li s9, 4 +; RV32-NEXT: li s10, 8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: li a5, 64 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: li ra, 256 +; RV32-NEXT: li a0, 512 +; RV32-NEXT: li a1, 1024 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: lui t0, 2 +; RV32-NEXT: lui t1, 4 +; RV32-NEXT: lui t2, 8 +; RV32-NEXT: lui t3, 16 +; RV32-NEXT: lui t4, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t6, 128 +; RV32-NEXT: lui s0, 256 +; RV32-NEXT: lui s1, 512 +; RV32-NEXT: lui s2, 1024 +; RV32-NEXT: lui s3, 2048 +; RV32-NEXT: lui s4, 4096 +; RV32-NEXT: lui s5, 8192 +; RV32-NEXT: lui s6, 16384 +; RV32-NEXT: sw s7, 272(sp) +; RV32-NEXT: lui s7, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw a7, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw s11, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw s8, 252(sp) +; RV32-NEXT: lui s8, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s9, 244(sp) +; RV32-NEXT: lui s9, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw s10, 236(sp) +; RV32-NEXT: lui s10, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw a3, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw a4, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw a5, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw a6, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw ra, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw a0, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a1, 180(sp) +; RV32-NEXT: slli s11, s11, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw s11, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a2, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw t0, 156(sp) +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw t1, 148(sp) +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw t2, 140(sp) +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw t3, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t4, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t5, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t6, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s0, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s1, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s2, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s3, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s4, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s5, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s6, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s7, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw s8, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw s9, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw s10, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw a7, 12(sp) +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v3, a0 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vmv.v.x v2, a0 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vmv.v.x v1, a0 +; RV32-NEXT: addi a0, sp, 272 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v0, (a0), zero +; RV32-NEXT: addi a0, sp, 264 +; RV32-NEXT: vlse64.v v13, (a0), zero +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vlse64.v v14, (a0), zero +; RV32-NEXT: addi a0, sp, 248 +; RV32-NEXT: vlse64.v v15, (a0), zero +; RV32-NEXT: addi a0, sp, 240 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: addi a0, sp, 232 +; RV32-NEXT: vlse64.v v17, (a0), zero +; RV32-NEXT: addi a0, sp, 224 +; RV32-NEXT: vlse64.v v18, (a0), zero +; RV32-NEXT: addi a0, sp, 216 +; RV32-NEXT: vlse64.v v19, (a0), zero +; RV32-NEXT: addi a0, sp, 208 +; RV32-NEXT: vlse64.v v20, (a0), zero +; RV32-NEXT: addi a0, sp, 200 +; RV32-NEXT: vlse64.v v21, (a0), zero +; RV32-NEXT: addi a0, sp, 192 +; RV32-NEXT: vlse64.v v22, (a0), zero +; RV32-NEXT: addi a0, sp, 184 +; RV32-NEXT: vlse64.v v23, (a0), zero +; RV32-NEXT: addi a0, sp, 176 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: addi a0, sp, 168 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: addi a0, sp, 160 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: addi a0, sp, 152 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: addi a0, sp, 144 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: addi a0, sp, 136 +; RV32-NEXT: vlse64.v v29, (a0), zero +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vlse64.v v30, (a0), zero +; RV32-NEXT: addi a0, sp, 120 +; RV32-NEXT: vlse64.v v31, (a0), zero +; RV32-NEXT: addi a0, sp, 112 +; RV32-NEXT: vlse64.v v11, (a0), zero +; RV32-NEXT: addi a0, sp, 104 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: vlse64.v v5, (a0), zero +; RV32-NEXT: addi a0, sp, 88 +; RV32-NEXT: vlse64.v v4, (a0), zero +; RV32-NEXT: li a6, 56 +; RV32-NEXT: vsrl.vi v27, v8, 24 +; RV32-NEXT: vsrl.vx v28, v8, a6 +; RV32-NEXT: li ra, 40 +; RV32-NEXT: vsrl.vx v7, v8, ra +; RV32-NEXT: vsll.vx v6, v8, a6 +; RV32-NEXT: addi a4, t3, -256 +; RV32-NEXT: vand.vx v7, v7, a4 +; RV32-NEXT: vor.vv v28, v7, v28 +; RV32-NEXT: vand.vx v7, v8, a4 +; RV32-NEXT: vsll.vx v7, v7, ra +; RV32-NEXT: vor.vv v7, v6, v7 +; RV32-NEXT: vsrl.vi v6, v8, 8 +; RV32-NEXT: lui a5, 4080 +; RV32-NEXT: vand.vx v27, v27, a5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v6, v6, v0 +; RV32-NEXT: vor.vv v27, v6, v27 +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: vlse64.v v6, (a3), zero +; RV32-NEXT: vor.vv v27, v27, v28 +; RV32-NEXT: vand.vx v28, v8, a5 +; RV32-NEXT: vsll.vi v28, v28, 24 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v28, v8 +; RV32-NEXT: addi a3, sp, 72 +; RV32-NEXT: vlse64.v v28, (a3), zero +; RV32-NEXT: vor.vv v8, v7, v8 +; RV32-NEXT: addi a3, sp, 64 +; RV32-NEXT: vlse64.v v7, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v27 +; RV32-NEXT: vsrl.vi v27, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v3 +; RV32-NEXT: vand.vv v27, v27, v3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: vsrl.vi v27, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v2 +; RV32-NEXT: vand.vv v27, v27, v2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: vsrl.vi v27, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v1 +; RV32-NEXT: vand.vv v27, v27, v1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: addi a3, sp, 56 +; RV32-NEXT: vlse64.v v27, (a3), zero +; RV32-NEXT: vand.vv v13, v8, v13 +; RV32-NEXT: vand.vv v14, v8, v14 +; RV32-NEXT: vand.vv v15, v8, v15 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vv v17, v8, v17 +; RV32-NEXT: vand.vv v18, v8, v18 +; RV32-NEXT: vand.vv v19, v8, v19 +; RV32-NEXT: vand.vv v20, v8, v20 +; RV32-NEXT: vand.vv v21, v8, v21 +; RV32-NEXT: vand.vv v22, v8, v22 +; RV32-NEXT: vand.vv v23, v8, v23 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vand.vv v25, v8, v25 +; RV32-NEXT: vand.vv v26, v8, v26 +; RV32-NEXT: vand.vv v3, v8, v9 +; RV32-NEXT: vand.vv v2, v8, v10 +; RV32-NEXT: vand.vv v29, v8, v29 +; RV32-NEXT: vand.vv v30, v8, v30 +; RV32-NEXT: vand.vv v31, v8, v31 +; RV32-NEXT: vand.vv v0, v8, v11 +; RV32-NEXT: vand.vv v9, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v5, v8, v5 +; RV32-NEXT: vand.vv v4, v8, v4 +; RV32-NEXT: vand.vv v6, v8, v6 +; RV32-NEXT: vand.vv v9, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: addi a0, sp, 40 +; RV32-NEXT: vlse64.v v9, (a3), zero +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vand.vv v11, v8, v7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v11, v8, v27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a2), zero +; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 8 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 1024 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s11 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t3 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t5 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t6 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s3 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s5 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s6 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s7 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s8 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s9 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v1, v8, s10 +; RV32-NEXT: vmul.vv v1, v8, v1 +; RV32-NEXT: vmul.vv v9, v8, v13 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v14 +; RV32-NEXT: vmul.vv v11, v8, v15 +; RV32-NEXT: vmul.vv v12, v8, v16 +; RV32-NEXT: vmul.vv v13, v8, v17 +; RV32-NEXT: vmul.vv v14, v8, v18 +; RV32-NEXT: vmul.vv v15, v8, v19 +; RV32-NEXT: vmul.vv v16, v8, v20 +; RV32-NEXT: vmul.vv v17, v8, v21 +; RV32-NEXT: vmul.vv v18, v8, v22 +; RV32-NEXT: vmul.vv v19, v8, v23 +; RV32-NEXT: vmul.vv v20, v8, v24 +; RV32-NEXT: vmul.vv v21, v8, v25 +; RV32-NEXT: vmul.vv v22, v8, v26 +; RV32-NEXT: vmul.vv v23, v8, v3 +; RV32-NEXT: vmul.vv v24, v8, v2 +; RV32-NEXT: vmul.vv v25, v8, v29 +; RV32-NEXT: vmul.vv v26, v8, v30 +; RV32-NEXT: vmul.vv v27, v8, v31 +; RV32-NEXT: vmul.vv v28, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v29, v8, v29 +; RV32-NEXT: vmul.vv v30, v8, v5 +; RV32-NEXT: vmul.vv v31, v8, v4 +; RV32-NEXT: vmul.vv v7, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v5, v8, v5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v3, v8, v3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vxor.vv v8, v8, v1 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vxor.vv v8, v8, v11 +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v8, v13 +; RV32-NEXT: vxor.vv v8, v8, v14 +; RV32-NEXT: vxor.vv v8, v8, v15 +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v17 +; RV32-NEXT: vxor.vv v8, v8, v18 +; RV32-NEXT: vxor.vv v8, v8, v19 +; RV32-NEXT: vxor.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v8, v21 +; RV32-NEXT: vxor.vv v8, v8, v22 +; RV32-NEXT: vxor.vv v8, v8, v23 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v25 +; RV32-NEXT: vxor.vv v8, v8, v26 +; RV32-NEXT: vxor.vv v8, v8, v27 +; RV32-NEXT: vxor.vv v8, v8, v28 +; RV32-NEXT: vxor.vv v8, v8, v29 +; RV32-NEXT: vxor.vv v8, v8, v30 +; RV32-NEXT: vxor.vv v8, v8, v31 +; RV32-NEXT: vxor.vv v8, v8, v7 +; RV32-NEXT: vxor.vv v8, v8, v6 +; RV32-NEXT: vxor.vv v8, v8, v5 +; RV32-NEXT: vxor.vv v8, v8, v4 +; RV32-NEXT: vxor.vv v8, v8, v3 +; RV32-NEXT: vxor.vv v8, v8, v2 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vx v9, v8, a6 +; RV32-NEXT: vsll.vx v10, v8, a6 +; RV32-NEXT: vsrl.vx v11, v8, ra +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vand.vx v11, v11, a4 +; RV32-NEXT: vsrl.vi v13, v8, 24 +; RV32-NEXT: vand.vx v14, v8, a5 +; RV32-NEXT: vand.vx v13, v13, a5 +; RV32-NEXT: vsll.vx v12, v12, ra +; RV32-NEXT: vsrl.vi v15, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v15, v15, v16 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vor.vv v11, v15, v13 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vsll.vi v13, v14, 24 +; RV32-NEXT: vor.vv v8, v13, v8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -224 +; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: li s11, 56 +; RV64-NEXT: li ra, 40 +; RV64-NEXT: lui a0, 16 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: vsrl.vi v9, v8, 8 +; RV64-NEXT: li t2, 255 +; RV64-NEXT: lui t6, 61681 +; RV64-NEXT: lui s0, 209715 +; RV64-NEXT: lui s1, 349525 +; RV64-NEXT: li s10, 16 +; RV64-NEXT: li s9, 32 +; RV64-NEXT: li s8, 64 +; RV64-NEXT: li s7, 128 +; RV64-NEXT: li s5, 256 +; RV64-NEXT: li t5, 512 +; RV64-NEXT: li t3, 1024 +; RV64-NEXT: li t0, 1 +; RV64-NEXT: lui s6, 1 +; RV64-NEXT: lui s4, 2 +; RV64-NEXT: lui t4, 4 +; RV64-NEXT: lui t1, 8 +; RV64-NEXT: lui a7, 32 +; RV64-NEXT: lui a6, 64 +; RV64-NEXT: lui a5, 128 +; RV64-NEXT: lui a4, 256 +; RV64-NEXT: lui a3, 512 +; RV64-NEXT: lui a2, 1024 +; RV64-NEXT: vsrl.vx v11, v8, s11 +; RV64-NEXT: vsrl.vx v12, v8, ra +; RV64-NEXT: addi t6, t6, -241 +; RV64-NEXT: addi s2, s0, 819 +; RV64-NEXT: addi s3, s1, 1365 +; RV64-NEXT: slli s1, t6, 32 +; RV64-NEXT: add s1, t6, s1 +; RV64-NEXT: slli t6, s2, 32 +; RV64-NEXT: add s2, s2, t6 +; RV64-NEXT: slli t6, s3, 32 +; RV64-NEXT: add s3, s3, t6 +; RV64-NEXT: addi s0, a0, -256 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: slli t6, t2, 24 +; RV64-NEXT: vand.vx v13, v8, a0 +; RV64-NEXT: vsll.vx v14, v8, s11 +; RV64-NEXT: vand.vx v12, v12, s0 +; RV64-NEXT: vand.vx v9, v9, t6 +; RV64-NEXT: vsll.vi v13, v13, 24 +; RV64-NEXT: vand.vx v15, v8, t6 +; RV64-NEXT: vand.vx v8, v8, s0 +; RV64-NEXT: vor.vv v11, v12, v11 +; RV64-NEXT: vor.vv v9, v9, v10 +; RV64-NEXT: vsll.vi v10, v15, 8 +; RV64-NEXT: vsll.vx v8, v8, ra +; RV64-NEXT: vor.vv v9, v9, v11 +; RV64-NEXT: vor.vv v10, v13, v10 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vand.vx v8, v8, s1 +; RV64-NEXT: vand.vx v9, v9, s1 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: vand.vx v8, v8, s2 +; RV64-NEXT: vand.vx v9, v9, s2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vx v8, v8, s3 +; RV64-NEXT: vand.vx v9, v9, s3 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vand.vx v9, v8, s10 +; RV64-NEXT: lui t2, 4096 +; RV64-NEXT: vand.vx v10, v8, s9 +; RV64-NEXT: lui s9, 8192 +; RV64-NEXT: vand.vx v11, v8, s8 +; RV64-NEXT: lui s8, 16384 +; RV64-NEXT: vand.vx v12, v8, s7 +; RV64-NEXT: lui s10, 32768 +; RV64-NEXT: vand.vx v13, v8, s5 +; RV64-NEXT: lui s11, 65536 +; RV64-NEXT: vand.vx v14, v8, t5 +; RV64-NEXT: lui t5, 131072 +; RV64-NEXT: vand.vx v15, v8, t3 +; RV64-NEXT: slli t3, t0, 11 +; RV64-NEXT: vand.vx v16, v8, t3 +; RV64-NEXT: lui t3, 262144 +; RV64-NEXT: vand.vx v17, v8, s6 +; RV64-NEXT: slli a0, t0, 31 +; RV64-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v18, v8, s4 +; RV64-NEXT: slli a0, t0, 32 +; RV64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v19, v8, t4 +; RV64-NEXT: slli a0, t0, 33 +; RV64-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v20, v8, t1 +; RV64-NEXT: slli a0, t0, 34 +; RV64-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v21, v8, a1 +; RV64-NEXT: slli a0, t0, 35 +; RV64-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v22, v8, a7 +; RV64-NEXT: slli a0, t0, 36 +; RV64-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v23, v8, a6 +; RV64-NEXT: slli a0, t0, 37 +; RV64-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v24, v8, a5 +; RV64-NEXT: slli a0, t0, 38 +; RV64-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v25, v8, a4 +; RV64-NEXT: slli a0, t0, 39 +; RV64-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v26, v8, a3 +; RV64-NEXT: slli a0, t0, 40 +; RV64-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v27, v8, a2 +; RV64-NEXT: slli a0, t0, 41 +; RV64-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a0, 2048 +; RV64-NEXT: vand.vx v28, v8, a0 +; RV64-NEXT: slli s5, t0, 42 +; RV64-NEXT: vand.vx v29, v8, t2 +; RV64-NEXT: slli s6, t0, 43 +; RV64-NEXT: vand.vx v30, v8, s9 +; RV64-NEXT: slli s7, t0, 44 +; RV64-NEXT: vand.vx v31, v8, s8 +; RV64-NEXT: slli s8, t0, 45 +; RV64-NEXT: vand.vx v7, v8, s10 +; RV64-NEXT: slli s9, t0, 46 +; RV64-NEXT: vand.vx v6, v8, s11 +; RV64-NEXT: slli s10, t0, 47 +; RV64-NEXT: vand.vx v5, v8, t5 +; RV64-NEXT: slli s11, t0, 48 +; RV64-NEXT: vand.vx v0, v8, t3 +; RV64-NEXT: slli ra, t0, 49 +; RV64-NEXT: slli t5, t0, 50 +; RV64-NEXT: slli t4, t0, 51 +; RV64-NEXT: slli t3, t0, 52 +; RV64-NEXT: slli t2, t0, 53 +; RV64-NEXT: slli t1, t0, 54 +; RV64-NEXT: slli a7, t0, 55 +; RV64-NEXT: slli a6, t0, 56 +; RV64-NEXT: slli a5, t0, 57 +; RV64-NEXT: slli a4, t0, 58 +; RV64-NEXT: slli a3, t0, 59 +; RV64-NEXT: slli a2, t0, 60 +; RV64-NEXT: slli a1, t0, 61 +; RV64-NEXT: slli t0, t0, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vi v4, v8, 2 +; RV64-NEXT: vand.vi v3, v8, 1 +; RV64-NEXT: vand.vi v2, v8, 4 +; RV64-NEXT: vand.vi v1, v8, 8 +; RV64-NEXT: vmul.vv v4, v8, v4 +; RV64-NEXT: sd t6, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 5 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v4, v8, v3 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 5 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v4, v8, v2 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 5 +; RV64-NEXT: sub t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v4, v8, v1 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v10 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v11 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v12 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v13 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v14 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v15 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v16 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v17 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v18 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v19 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v20 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v21 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 4 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v22 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v23 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 4 +; RV64-NEXT: sub t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v24 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v25 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v26 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v27 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v28 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v29 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 3 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v30 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v31 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 3 +; RV64-NEXT: sub t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v7 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v6 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 2 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v5 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v0 +; RV64-NEXT: csrr s4, vlenb +; RV64-NEXT: slli t6, s4, 1 +; RV64-NEXT: add s4, t6, s4 +; RV64-NEXT: ld t6, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s4, sp, s4 +; RV64-NEXT: addi s4, s4, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr s4, vlenb +; RV64-NEXT: slli s4, s4, 1 +; RV64-NEXT: add s4, sp, s4 +; RV64-NEXT: addi s4, s4, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr s4, vlenb +; RV64-NEXT: add s4, sp, s4 +; RV64-NEXT: addi s4, s4, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: addi s4, sp, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v3, v8, v9 +; RV64-NEXT: ld s4, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v4, v8, v9 +; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v5, v8, v9 +; RV64-NEXT: ld s4, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v6, v8, v9 +; RV64-NEXT: ld s4, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v7, v8, v9 +; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v31, v8, v9 +; RV64-NEXT: ld s4, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v30, v8, v9 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v29, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s5 +; RV64-NEXT: vmul.vv v28, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s6 +; RV64-NEXT: vmul.vv v27, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s7 +; RV64-NEXT: vmul.vv v26, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s8 +; RV64-NEXT: vmul.vv v25, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s9 +; RV64-NEXT: vmul.vv v24, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s10 +; RV64-NEXT: vmul.vv v23, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s11 +; RV64-NEXT: vmul.vv v22, v8, v9 +; RV64-NEXT: vand.vx v9, v8, ra +; RV64-NEXT: vmul.vv v21, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t5 +; RV64-NEXT: vmul.vv v20, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t4 +; RV64-NEXT: vmul.vv v19, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t3 +; RV64-NEXT: vmul.vv v18, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t2 +; RV64-NEXT: vmul.vv v17, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t1 +; RV64-NEXT: vmul.vv v16, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a7 +; RV64-NEXT: vmul.vv v15, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a6 +; RV64-NEXT: vmul.vv v14, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a5 +; RV64-NEXT: vmul.vv v13, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a4 +; RV64-NEXT: vmul.vv v12, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a3 +; RV64-NEXT: vmul.vv v11, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a2 +; RV64-NEXT: vmul.vv v10, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: vand.vx v0, v8, t0 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vand.vx v1, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 5 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v2, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 5 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: addi a0, sp, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v1, v2 +; RV64-NEXT: vxor.vv v3, v2, v3 +; RV64-NEXT: vxor.vv v4, v3, v4 +; RV64-NEXT: vxor.vv v5, v4, v5 +; RV64-NEXT: vxor.vv v6, v5, v6 +; RV64-NEXT: vxor.vv v7, v6, v7 +; RV64-NEXT: vxor.vv v31, v7, v31 +; RV64-NEXT: vxor.vv v30, v31, v30 +; RV64-NEXT: vxor.vv v29, v30, v29 +; RV64-NEXT: vxor.vv v28, v29, v28 +; RV64-NEXT: vxor.vv v27, v28, v27 +; RV64-NEXT: vxor.vv v26, v27, v26 +; RV64-NEXT: vxor.vv v25, v26, v25 +; RV64-NEXT: vxor.vv v24, v25, v24 +; RV64-NEXT: vxor.vv v23, v24, v23 +; RV64-NEXT: vxor.vv v22, v23, v22 +; RV64-NEXT: vxor.vv v21, v22, v21 +; RV64-NEXT: vxor.vv v20, v21, v20 +; RV64-NEXT: vxor.vv v19, v20, v19 +; RV64-NEXT: vxor.vv v18, v19, v18 +; RV64-NEXT: vxor.vv v17, v18, v17 +; RV64-NEXT: vxor.vv v16, v17, v16 +; RV64-NEXT: vxor.vv v15, v16, v15 +; RV64-NEXT: vxor.vv v14, v15, v14 +; RV64-NEXT: vxor.vv v13, v14, v13 +; RV64-NEXT: vxor.vv v12, v13, v12 +; RV64-NEXT: vxor.vv v11, v12, v11 +; RV64-NEXT: vxor.vv v10, v11, v10 +; RV64-NEXT: vxor.vv v9, v10, v9 +; RV64-NEXT: vxor.vv v9, v9, v0 +; RV64-NEXT: vxor.vv v8, v9, v8 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v9, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: vsrl.vi v11, v8, 24 +; RV64-NEXT: vsrl.vi v12, v8, 8 +; RV64-NEXT: vand.vx v10, v10, s0 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vand.vx v10, v8, t6 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v11, v11, a2 +; RV64-NEXT: vand.vx v12, v12, t6 +; RV64-NEXT: vor.vv v11, v12, v11 +; RV64-NEXT: vand.vx v12, v8, a2 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vsll.vx v12, v8, a0 +; RV64-NEXT: vand.vx v8, v8, s0 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v9, v11, v9 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vand.vx v8, v8, s1 +; RV64-NEXT: vand.vx v9, v9, s1 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: vand.vx v8, v8, s2 +; RV64-NEXT: vand.vx v9, v9, s2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vx v8, v8, s3 +; RV64-NEXT: vand.vx v9, v9, s3 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 224 +; RV64-NEXT: ret + %a = call <1 x i64> @llvm.clmulr.v1i64(<1 x i64> %x, <1 x i64> %y) + ret <1 x i64> %a +} + +define <2 x i64> @clmulr_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { +; RV32-LABEL: clmulr_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s7, 1044480 +; RV32-NEXT: lui a7, 524288 +; RV32-NEXT: li s11, 1 +; RV32-NEXT: li s8, 2 +; RV32-NEXT: li s9, 4 +; RV32-NEXT: li s10, 8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: li a5, 64 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: li ra, 256 +; RV32-NEXT: li a0, 512 +; RV32-NEXT: li a1, 1024 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: lui t0, 2 +; RV32-NEXT: lui t1, 4 +; RV32-NEXT: lui t2, 8 +; RV32-NEXT: lui t3, 16 +; RV32-NEXT: lui t4, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t6, 128 +; RV32-NEXT: lui s0, 256 +; RV32-NEXT: lui s1, 512 +; RV32-NEXT: lui s2, 1024 +; RV32-NEXT: lui s3, 2048 +; RV32-NEXT: lui s4, 4096 +; RV32-NEXT: lui s5, 8192 +; RV32-NEXT: lui s6, 16384 +; RV32-NEXT: sw s7, 272(sp) +; RV32-NEXT: lui s7, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw a7, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw s11, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw s8, 252(sp) +; RV32-NEXT: lui s8, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s9, 244(sp) +; RV32-NEXT: lui s9, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw s10, 236(sp) +; RV32-NEXT: lui s10, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw a3, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw a4, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw a5, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw a6, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw ra, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw a0, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a1, 180(sp) +; RV32-NEXT: slli s11, s11, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw s11, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a2, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw t0, 156(sp) +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw t1, 148(sp) +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw t2, 140(sp) +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw t3, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t4, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t5, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t6, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s0, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s1, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s2, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s3, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s4, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s5, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s6, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s7, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw s8, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw s9, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw s10, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw a7, 12(sp) +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v3, a0 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vmv.v.x v2, a0 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vmv.v.x v1, a0 +; RV32-NEXT: addi a0, sp, 272 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v0, (a0), zero +; RV32-NEXT: addi a0, sp, 264 +; RV32-NEXT: vlse64.v v13, (a0), zero +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vlse64.v v14, (a0), zero +; RV32-NEXT: addi a0, sp, 248 +; RV32-NEXT: vlse64.v v15, (a0), zero +; RV32-NEXT: addi a0, sp, 240 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: addi a0, sp, 232 +; RV32-NEXT: vlse64.v v17, (a0), zero +; RV32-NEXT: addi a0, sp, 224 +; RV32-NEXT: vlse64.v v18, (a0), zero +; RV32-NEXT: addi a0, sp, 216 +; RV32-NEXT: vlse64.v v19, (a0), zero +; RV32-NEXT: addi a0, sp, 208 +; RV32-NEXT: vlse64.v v20, (a0), zero +; RV32-NEXT: addi a0, sp, 200 +; RV32-NEXT: vlse64.v v21, (a0), zero +; RV32-NEXT: addi a0, sp, 192 +; RV32-NEXT: vlse64.v v22, (a0), zero +; RV32-NEXT: addi a0, sp, 184 +; RV32-NEXT: vlse64.v v23, (a0), zero +; RV32-NEXT: addi a0, sp, 176 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: addi a0, sp, 168 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: addi a0, sp, 160 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: addi a0, sp, 152 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: addi a0, sp, 144 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: addi a0, sp, 136 +; RV32-NEXT: vlse64.v v29, (a0), zero +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vlse64.v v30, (a0), zero +; RV32-NEXT: addi a0, sp, 120 +; RV32-NEXT: vlse64.v v31, (a0), zero +; RV32-NEXT: addi a0, sp, 112 +; RV32-NEXT: vlse64.v v11, (a0), zero +; RV32-NEXT: addi a0, sp, 104 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: vlse64.v v5, (a0), zero +; RV32-NEXT: addi a0, sp, 88 +; RV32-NEXT: vlse64.v v4, (a0), zero +; RV32-NEXT: li a6, 56 +; RV32-NEXT: vsrl.vi v27, v8, 24 +; RV32-NEXT: vsrl.vx v28, v8, a6 +; RV32-NEXT: li ra, 40 +; RV32-NEXT: vsrl.vx v7, v8, ra +; RV32-NEXT: vsll.vx v6, v8, a6 +; RV32-NEXT: addi a4, t3, -256 +; RV32-NEXT: vand.vx v7, v7, a4 +; RV32-NEXT: vor.vv v28, v7, v28 +; RV32-NEXT: vand.vx v7, v8, a4 +; RV32-NEXT: vsll.vx v7, v7, ra +; RV32-NEXT: vor.vv v7, v6, v7 +; RV32-NEXT: vsrl.vi v6, v8, 8 +; RV32-NEXT: lui a5, 4080 +; RV32-NEXT: vand.vx v27, v27, a5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v6, v6, v0 +; RV32-NEXT: vor.vv v27, v6, v27 +; RV32-NEXT: addi a3, sp, 80 +; RV32-NEXT: vlse64.v v6, (a3), zero +; RV32-NEXT: vor.vv v27, v27, v28 +; RV32-NEXT: vand.vx v28, v8, a5 +; RV32-NEXT: vsll.vi v28, v28, 24 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v28, v8 +; RV32-NEXT: addi a3, sp, 72 +; RV32-NEXT: vlse64.v v28, (a3), zero +; RV32-NEXT: vor.vv v8, v7, v8 +; RV32-NEXT: addi a3, sp, 64 +; RV32-NEXT: vlse64.v v7, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v27 +; RV32-NEXT: vsrl.vi v27, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v3 +; RV32-NEXT: vand.vv v27, v27, v3 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: vsrl.vi v27, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v2 +; RV32-NEXT: vand.vv v27, v27, v2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: vsrl.vi v27, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v1 +; RV32-NEXT: vand.vv v27, v27, v1 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v27, v8 +; RV32-NEXT: addi a3, sp, 56 +; RV32-NEXT: vlse64.v v27, (a3), zero +; RV32-NEXT: vand.vv v13, v8, v13 +; RV32-NEXT: vand.vv v14, v8, v14 +; RV32-NEXT: vand.vv v15, v8, v15 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vv v17, v8, v17 +; RV32-NEXT: vand.vv v18, v8, v18 +; RV32-NEXT: vand.vv v19, v8, v19 +; RV32-NEXT: vand.vv v20, v8, v20 +; RV32-NEXT: vand.vv v21, v8, v21 +; RV32-NEXT: vand.vv v22, v8, v22 +; RV32-NEXT: vand.vv v23, v8, v23 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vand.vv v25, v8, v25 +; RV32-NEXT: vand.vv v26, v8, v26 +; RV32-NEXT: vand.vv v3, v8, v9 +; RV32-NEXT: vand.vv v2, v8, v10 +; RV32-NEXT: vand.vv v29, v8, v29 +; RV32-NEXT: vand.vv v30, v8, v30 +; RV32-NEXT: vand.vv v31, v8, v31 +; RV32-NEXT: vand.vv v0, v8, v11 +; RV32-NEXT: vand.vv v9, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v5, v8, v5 +; RV32-NEXT: vand.vv v4, v8, v4 +; RV32-NEXT: vand.vv v6, v8, v6 +; RV32-NEXT: vand.vv v9, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: addi a0, sp, 40 +; RV32-NEXT: vlse64.v v9, (a3), zero +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vand.vv v11, v8, v7 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v11, v8, v27 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a2), zero +; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: vlse64.v v11, (a1), zero +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v11 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vv v9, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vi v9, v8, 8 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: li a0, 1024 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s11 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v9, v8, a0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t3 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t5 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, t6 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s0 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s1 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s2 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s3 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s4 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s5 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s6 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s7 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s8 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v9, v8, s9 +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vand.vx v1, v8, s10 +; RV32-NEXT: vmul.vv v1, v8, v1 +; RV32-NEXT: vmul.vv v9, v8, v13 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: vmul.vv v10, v8, v14 +; RV32-NEXT: vmul.vv v11, v8, v15 +; RV32-NEXT: vmul.vv v12, v8, v16 +; RV32-NEXT: vmul.vv v13, v8, v17 +; RV32-NEXT: vmul.vv v14, v8, v18 +; RV32-NEXT: vmul.vv v15, v8, v19 +; RV32-NEXT: vmul.vv v16, v8, v20 +; RV32-NEXT: vmul.vv v17, v8, v21 +; RV32-NEXT: vmul.vv v18, v8, v22 +; RV32-NEXT: vmul.vv v19, v8, v23 +; RV32-NEXT: vmul.vv v20, v8, v24 +; RV32-NEXT: vmul.vv v21, v8, v25 +; RV32-NEXT: vmul.vv v22, v8, v26 +; RV32-NEXT: vmul.vv v23, v8, v3 +; RV32-NEXT: vmul.vv v24, v8, v2 +; RV32-NEXT: vmul.vv v25, v8, v29 +; RV32-NEXT: vmul.vv v26, v8, v30 +; RV32-NEXT: vmul.vv v27, v8, v31 +; RV32-NEXT: vmul.vv v28, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v29, v8, v29 +; RV32-NEXT: vmul.vv v30, v8, v5 +; RV32-NEXT: vmul.vv v31, v8, v4 +; RV32-NEXT: vmul.vv v7, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v5, v8, v5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v3, v8, v3 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v9, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vxor.vv v8, v8, v1 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vxor.vv v8, v8, v11 +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v8, v13 +; RV32-NEXT: vxor.vv v8, v8, v14 +; RV32-NEXT: vxor.vv v8, v8, v15 +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v17 +; RV32-NEXT: vxor.vv v8, v8, v18 +; RV32-NEXT: vxor.vv v8, v8, v19 +; RV32-NEXT: vxor.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v8, v21 +; RV32-NEXT: vxor.vv v8, v8, v22 +; RV32-NEXT: vxor.vv v8, v8, v23 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v25 +; RV32-NEXT: vxor.vv v8, v8, v26 +; RV32-NEXT: vxor.vv v8, v8, v27 +; RV32-NEXT: vxor.vv v8, v8, v28 +; RV32-NEXT: vxor.vv v8, v8, v29 +; RV32-NEXT: vxor.vv v8, v8, v30 +; RV32-NEXT: vxor.vv v8, v8, v31 +; RV32-NEXT: vxor.vv v8, v8, v7 +; RV32-NEXT: vxor.vv v8, v8, v6 +; RV32-NEXT: vxor.vv v8, v8, v5 +; RV32-NEXT: vxor.vv v8, v8, v4 +; RV32-NEXT: vxor.vv v8, v8, v3 +; RV32-NEXT: vxor.vv v8, v8, v2 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vx v9, v8, a6 +; RV32-NEXT: vsll.vx v10, v8, a6 +; RV32-NEXT: vsrl.vx v11, v8, ra +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vand.vx v11, v11, a4 +; RV32-NEXT: vsrl.vi v13, v8, 24 +; RV32-NEXT: vand.vx v14, v8, a5 +; RV32-NEXT: vand.vx v13, v13, a5 +; RV32-NEXT: vsll.vx v12, v12, ra +; RV32-NEXT: vsrl.vi v15, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v15, v15, v16 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vor.vv v11, v15, v13 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vsll.vi v13, v14, 24 +; RV32-NEXT: vor.vv v8, v13, v8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vsrl.vi v9, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsrl.vi v9, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -224 +; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: li s11, 56 +; RV64-NEXT: li ra, 40 +; RV64-NEXT: lui a0, 16 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsrl.vi v10, v8, 24 +; RV64-NEXT: vsrl.vi v9, v8, 8 +; RV64-NEXT: li t2, 255 +; RV64-NEXT: lui t6, 61681 +; RV64-NEXT: lui s0, 209715 +; RV64-NEXT: lui s1, 349525 +; RV64-NEXT: li s10, 16 +; RV64-NEXT: li s9, 32 +; RV64-NEXT: li s8, 64 +; RV64-NEXT: li s7, 128 +; RV64-NEXT: li s5, 256 +; RV64-NEXT: li t5, 512 +; RV64-NEXT: li t3, 1024 +; RV64-NEXT: li t0, 1 +; RV64-NEXT: lui s6, 1 +; RV64-NEXT: lui s4, 2 +; RV64-NEXT: lui t4, 4 +; RV64-NEXT: lui t1, 8 +; RV64-NEXT: lui a7, 32 +; RV64-NEXT: lui a6, 64 +; RV64-NEXT: lui a5, 128 +; RV64-NEXT: lui a4, 256 +; RV64-NEXT: lui a3, 512 +; RV64-NEXT: lui a2, 1024 +; RV64-NEXT: vsrl.vx v11, v8, s11 +; RV64-NEXT: vsrl.vx v12, v8, ra +; RV64-NEXT: addi t6, t6, -241 +; RV64-NEXT: addi s2, s0, 819 +; RV64-NEXT: addi s3, s1, 1365 +; RV64-NEXT: slli s1, t6, 32 +; RV64-NEXT: add s1, t6, s1 +; RV64-NEXT: slli t6, s2, 32 +; RV64-NEXT: add s2, s2, t6 +; RV64-NEXT: slli t6, s3, 32 +; RV64-NEXT: add s3, s3, t6 +; RV64-NEXT: addi s0, a0, -256 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: lui a0, 4080 +; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: slli t6, t2, 24 +; RV64-NEXT: vand.vx v13, v8, a0 +; RV64-NEXT: vsll.vx v14, v8, s11 +; RV64-NEXT: vand.vx v12, v12, s0 +; RV64-NEXT: vand.vx v9, v9, t6 +; RV64-NEXT: vsll.vi v13, v13, 24 +; RV64-NEXT: vand.vx v15, v8, t6 +; RV64-NEXT: vand.vx v8, v8, s0 +; RV64-NEXT: vor.vv v11, v12, v11 +; RV64-NEXT: vor.vv v9, v9, v10 +; RV64-NEXT: vsll.vi v10, v15, 8 +; RV64-NEXT: vsll.vx v8, v8, ra +; RV64-NEXT: vor.vv v9, v9, v11 +; RV64-NEXT: vor.vv v10, v13, v10 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vand.vx v8, v8, s1 +; RV64-NEXT: vand.vx v9, v9, s1 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: vand.vx v8, v8, s2 +; RV64-NEXT: vand.vx v9, v9, s2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vx v8, v8, s3 +; RV64-NEXT: vand.vx v9, v9, s3 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vand.vx v9, v8, s10 +; RV64-NEXT: lui t2, 4096 +; RV64-NEXT: vand.vx v10, v8, s9 +; RV64-NEXT: lui s9, 8192 +; RV64-NEXT: vand.vx v11, v8, s8 +; RV64-NEXT: lui s8, 16384 +; RV64-NEXT: vand.vx v12, v8, s7 +; RV64-NEXT: lui s10, 32768 +; RV64-NEXT: vand.vx v13, v8, s5 +; RV64-NEXT: lui s11, 65536 +; RV64-NEXT: vand.vx v14, v8, t5 +; RV64-NEXT: lui t5, 131072 +; RV64-NEXT: vand.vx v15, v8, t3 +; RV64-NEXT: slli t3, t0, 11 +; RV64-NEXT: vand.vx v16, v8, t3 +; RV64-NEXT: lui t3, 262144 +; RV64-NEXT: vand.vx v17, v8, s6 +; RV64-NEXT: slli a0, t0, 31 +; RV64-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v18, v8, s4 +; RV64-NEXT: slli a0, t0, 32 +; RV64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v19, v8, t4 +; RV64-NEXT: slli a0, t0, 33 +; RV64-NEXT: sd a0, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v20, v8, t1 +; RV64-NEXT: slli a0, t0, 34 +; RV64-NEXT: sd a0, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v21, v8, a1 +; RV64-NEXT: slli a0, t0, 35 +; RV64-NEXT: sd a0, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v22, v8, a7 +; RV64-NEXT: slli a0, t0, 36 +; RV64-NEXT: sd a0, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v23, v8, a6 +; RV64-NEXT: slli a0, t0, 37 +; RV64-NEXT: sd a0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v24, v8, a5 +; RV64-NEXT: slli a0, t0, 38 +; RV64-NEXT: sd a0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v25, v8, a4 +; RV64-NEXT: slli a0, t0, 39 +; RV64-NEXT: sd a0, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v26, v8, a3 +; RV64-NEXT: slli a0, t0, 40 +; RV64-NEXT: sd a0, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v27, v8, a2 +; RV64-NEXT: slli a0, t0, 41 +; RV64-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a0, 2048 +; RV64-NEXT: vand.vx v28, v8, a0 +; RV64-NEXT: slli s5, t0, 42 +; RV64-NEXT: vand.vx v29, v8, t2 +; RV64-NEXT: slli s6, t0, 43 +; RV64-NEXT: vand.vx v30, v8, s9 +; RV64-NEXT: slli s7, t0, 44 +; RV64-NEXT: vand.vx v31, v8, s8 +; RV64-NEXT: slli s8, t0, 45 +; RV64-NEXT: vand.vx v7, v8, s10 +; RV64-NEXT: slli s9, t0, 46 +; RV64-NEXT: vand.vx v6, v8, s11 +; RV64-NEXT: slli s10, t0, 47 +; RV64-NEXT: vand.vx v5, v8, t5 +; RV64-NEXT: slli s11, t0, 48 +; RV64-NEXT: vand.vx v0, v8, t3 +; RV64-NEXT: slli ra, t0, 49 +; RV64-NEXT: slli t5, t0, 50 +; RV64-NEXT: slli t4, t0, 51 +; RV64-NEXT: slli t3, t0, 52 +; RV64-NEXT: slli t2, t0, 53 +; RV64-NEXT: slli t1, t0, 54 +; RV64-NEXT: slli a7, t0, 55 +; RV64-NEXT: slli a6, t0, 56 +; RV64-NEXT: slli a5, t0, 57 +; RV64-NEXT: slli a4, t0, 58 +; RV64-NEXT: slli a3, t0, 59 +; RV64-NEXT: slli a2, t0, 60 +; RV64-NEXT: slli a1, t0, 61 +; RV64-NEXT: slli t0, t0, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vi v4, v8, 2 +; RV64-NEXT: vand.vi v3, v8, 1 +; RV64-NEXT: vand.vi v2, v8, 4 +; RV64-NEXT: vand.vi v1, v8, 8 +; RV64-NEXT: vmul.vv v4, v8, v4 +; RV64-NEXT: sd t6, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 5 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v4, v8, v3 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 5 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v4, v8, v2 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 5 +; RV64-NEXT: sub t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v4, v8, v1 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v10 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v11 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v12 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v13 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v14 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v15 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v16 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v17 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v18 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v19 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v20 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v21 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 4 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v22 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v23 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 4 +; RV64-NEXT: sub t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v24 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v25 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v26 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v27 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add s4, s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v28 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v29 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 3 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v30 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v31 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 3 +; RV64-NEXT: sub t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v7 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: mv s4, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s4 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v6 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli s4, t6, 2 +; RV64-NEXT: add t6, s4, t6 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v5 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 2 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 112 +; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill +; RV64-NEXT: vmul.vv v9, v8, v0 +; RV64-NEXT: csrr s4, vlenb +; RV64-NEXT: slli t6, s4, 1 +; RV64-NEXT: add s4, t6, s4 +; RV64-NEXT: ld t6, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s4, sp, s4 +; RV64-NEXT: addi s4, s4, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr s4, vlenb +; RV64-NEXT: slli s4, s4, 1 +; RV64-NEXT: add s4, sp, s4 +; RV64-NEXT: addi s4, s4, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: csrr s4, vlenb +; RV64-NEXT: add s4, sp, s4 +; RV64-NEXT: addi s4, s4, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: addi s4, sp, 112 +; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill +; RV64-NEXT: ld s4, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v3, v8, v9 +; RV64-NEXT: ld s4, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v4, v8, v9 +; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v5, v8, v9 +; RV64-NEXT: ld s4, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v6, v8, v9 +; RV64-NEXT: ld s4, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v7, v8, v9 +; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v31, v8, v9 +; RV64-NEXT: ld s4, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v30, v8, v9 +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v9, v8, s4 +; RV64-NEXT: vmul.vv v29, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s5 +; RV64-NEXT: vmul.vv v28, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s6 +; RV64-NEXT: vmul.vv v27, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s7 +; RV64-NEXT: vmul.vv v26, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s8 +; RV64-NEXT: vmul.vv v25, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s9 +; RV64-NEXT: vmul.vv v24, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s10 +; RV64-NEXT: vmul.vv v23, v8, v9 +; RV64-NEXT: vand.vx v9, v8, s11 +; RV64-NEXT: vmul.vv v22, v8, v9 +; RV64-NEXT: vand.vx v9, v8, ra +; RV64-NEXT: vmul.vv v21, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t5 +; RV64-NEXT: vmul.vv v20, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t4 +; RV64-NEXT: vmul.vv v19, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t3 +; RV64-NEXT: vmul.vv v18, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t2 +; RV64-NEXT: vmul.vv v17, v8, v9 +; RV64-NEXT: vand.vx v9, v8, t1 +; RV64-NEXT: vmul.vv v16, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a7 +; RV64-NEXT: vmul.vv v15, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a6 +; RV64-NEXT: vmul.vv v14, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a5 +; RV64-NEXT: vmul.vv v13, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a4 +; RV64-NEXT: vmul.vv v12, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a3 +; RV64-NEXT: vmul.vv v11, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a2 +; RV64-NEXT: vmul.vv v10, v8, v9 +; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vmul.vv v9, v8, v9 +; RV64-NEXT: vand.vx v0, v8, t0 +; RV64-NEXT: vmul.vv v0, v8, v0 +; RV64-NEXT: vand.vx v1, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 5 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v2, v1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 5 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v1, v1, v2 +; RV64-NEXT: addi a0, sp, 112 +; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v1, v2 +; RV64-NEXT: vxor.vv v3, v2, v3 +; RV64-NEXT: vxor.vv v4, v3, v4 +; RV64-NEXT: vxor.vv v5, v4, v5 +; RV64-NEXT: vxor.vv v6, v5, v6 +; RV64-NEXT: vxor.vv v7, v6, v7 +; RV64-NEXT: vxor.vv v31, v7, v31 +; RV64-NEXT: vxor.vv v30, v31, v30 +; RV64-NEXT: vxor.vv v29, v30, v29 +; RV64-NEXT: vxor.vv v28, v29, v28 +; RV64-NEXT: vxor.vv v27, v28, v27 +; RV64-NEXT: vxor.vv v26, v27, v26 +; RV64-NEXT: vxor.vv v25, v26, v25 +; RV64-NEXT: vxor.vv v24, v25, v24 +; RV64-NEXT: vxor.vv v23, v24, v23 +; RV64-NEXT: vxor.vv v22, v23, v22 +; RV64-NEXT: vxor.vv v21, v22, v21 +; RV64-NEXT: vxor.vv v20, v21, v20 +; RV64-NEXT: vxor.vv v19, v20, v19 +; RV64-NEXT: vxor.vv v18, v19, v18 +; RV64-NEXT: vxor.vv v17, v18, v17 +; RV64-NEXT: vxor.vv v16, v17, v16 +; RV64-NEXT: vxor.vv v15, v16, v15 +; RV64-NEXT: vxor.vv v14, v15, v14 +; RV64-NEXT: vxor.vv v13, v14, v13 +; RV64-NEXT: vxor.vv v12, v13, v12 +; RV64-NEXT: vxor.vv v11, v12, v11 +; RV64-NEXT: vxor.vv v10, v11, v10 +; RV64-NEXT: vxor.vv v9, v10, v9 +; RV64-NEXT: vxor.vv v9, v9, v0 +; RV64-NEXT: vxor.vv v8, v9, v8 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v9, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v10, v8, a1 +; RV64-NEXT: vsrl.vi v11, v8, 24 +; RV64-NEXT: vsrl.vi v12, v8, 8 +; RV64-NEXT: vand.vx v10, v10, s0 +; RV64-NEXT: vor.vv v9, v10, v9 +; RV64-NEXT: vand.vx v10, v8, t6 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v11, v11, a2 +; RV64-NEXT: vand.vx v12, v12, t6 +; RV64-NEXT: vor.vv v11, v12, v11 +; RV64-NEXT: vand.vx v12, v8, a2 +; RV64-NEXT: vsll.vi v10, v10, 8 +; RV64-NEXT: vsll.vi v12, v12, 24 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vsll.vx v12, v8, a0 +; RV64-NEXT: vand.vx v8, v8, s0 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vor.vv v9, v11, v9 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vsrl.vi v9, v8, 4 +; RV64-NEXT: vand.vx v8, v8, s1 +; RV64-NEXT: vand.vx v9, v9, s1 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 2 +; RV64-NEXT: vand.vx v8, v8, s2 +; RV64-NEXT: vand.vx v9, v9, s2 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: vsrl.vi v9, v8, 1 +; RV64-NEXT: vand.vx v8, v8, s3 +; RV64-NEXT: vand.vx v9, v9, s3 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v9, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 224 +; RV64-NEXT: ret + %a = call <2 x i64> @llvm.clmulr.v2i64(<2 x i64> %x, <2 x i64> %y) + ret <2 x i64> %a +} + +define <4 x i64> @clmulr_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { +; RV32-LABEL: clmulr_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s7, 1044480 +; RV32-NEXT: lui a7, 524288 +; RV32-NEXT: li a1, 1 +; RV32-NEXT: li s8, 2 +; RV32-NEXT: li s9, 4 +; RV32-NEXT: li s10, 8 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: li a5, 64 +; RV32-NEXT: li a6, 128 +; RV32-NEXT: li s11, 256 +; RV32-NEXT: li ra, 512 +; RV32-NEXT: li a0, 1024 +; RV32-NEXT: lui a2, 1 +; RV32-NEXT: lui t0, 2 +; RV32-NEXT: lui t1, 4 +; RV32-NEXT: lui t2, 8 +; RV32-NEXT: lui t3, 16 +; RV32-NEXT: lui t4, 32 +; RV32-NEXT: lui t5, 64 +; RV32-NEXT: lui t6, 128 +; RV32-NEXT: lui s0, 256 +; RV32-NEXT: lui s1, 512 +; RV32-NEXT: lui s2, 1024 +; RV32-NEXT: lui s3, 2048 +; RV32-NEXT: lui s4, 4096 +; RV32-NEXT: lui s5, 8192 +; RV32-NEXT: lui s6, 16384 +; RV32-NEXT: sw s7, 272(sp) +; RV32-NEXT: lui s7, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw a7, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a1, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw s8, 252(sp) +; RV32-NEXT: lui s8, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw s9, 244(sp) +; RV32-NEXT: lui s9, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw s10, 236(sp) +; RV32-NEXT: lui s10, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw a3, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw a4, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw a5, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw a6, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw s11, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw ra, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a0, 180(sp) +; RV32-NEXT: slli a5, a1, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw a5, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a2, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw t0, 156(sp) +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw t1, 148(sp) +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw t2, 140(sp) +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw t3, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw t4, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw t5, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw t6, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s0, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s1, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s2, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s3, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s4, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s5, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s6, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s7, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw s8, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw s9, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw s10, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw a7, 12(sp) +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.v.x v4, a0 +; RV32-NEXT: lui a0, 209715 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vmv.v.x v2, a0 +; RV32-NEXT: lui a0, 349525 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: addi a0, sp, 272 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v6, (a0), zero +; RV32-NEXT: addi a0, sp, 264 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: addi a0, sp, 248 +; RV32-NEXT: vlse64.v v14, (a0), zero +; RV32-NEXT: addi a0, sp, 240 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: addi a0, sp, 232 +; RV32-NEXT: vlse64.v v18, (a0), zero +; RV32-NEXT: addi a0, sp, 224 +; RV32-NEXT: vlse64.v v20, (a0), zero +; RV32-NEXT: addi a0, sp, 216 +; RV32-NEXT: vlse64.v v22, (a0), zero +; RV32-NEXT: li ra, 56 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vx v26, v8, ra +; RV32-NEXT: li s11, 40 +; RV32-NEXT: vsrl.vx v28, v8, s11 +; RV32-NEXT: vsll.vx v30, v8, ra +; RV32-NEXT: addi a4, t3, -256 +; RV32-NEXT: vand.vx v28, v28, a4 +; RV32-NEXT: vor.vv v26, v28, v26 +; RV32-NEXT: vand.vx v28, v8, a4 +; RV32-NEXT: vsll.vx v28, v28, s11 +; RV32-NEXT: vor.vv v30, v30, v28 +; RV32-NEXT: vsrl.vi v28, v8, 8 +; RV32-NEXT: lui a6, 4080 +; RV32-NEXT: vand.vx v24, v24, a6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v28, v28, v6 +; RV32-NEXT: vor.vv v28, v28, v24 +; RV32-NEXT: addi a3, sp, 208 +; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: vor.vv v10, v28, v26 +; RV32-NEXT: vand.vx v26, v8, a6 +; RV32-NEXT: vsll.vi v26, v26, 24 +; RV32-NEXT: vand.vv v8, v8, v6 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v26, v8 +; RV32-NEXT: addi a3, sp, 200 +; RV32-NEXT: vlse64.v v28, (a3), zero +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: addi a3, sp, 192 +; RV32-NEXT: vlse64.v v26, (a3), zero +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v30, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v4 +; RV32-NEXT: vand.vv v30, v30, v4 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: vsrl.vi v30, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v2, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v2 +; RV32-NEXT: vand.vv v30, v30, v2 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: vsrl.vi v30, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v0, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vand.vv v30, v30, v0 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v30, v8 +; RV32-NEXT: addi a3, sp, 184 +; RV32-NEXT: vlse64.v v30, (a3), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v6, v8, v10 +; RV32-NEXT: vand.vv v4, v8, v12 +; RV32-NEXT: vand.vv v2, v8, v14 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vand.vv v10, v8, v18 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v28, v8, v28 +; RV32-NEXT: addi a3, sp, 176 +; RV32-NEXT: addi a0, sp, 168 +; RV32-NEXT: vlse64.v v10, (a3), zero +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vand.vv v14, v8, v26 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v14, v8, v30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a2, sp, 160 +; RV32-NEXT: addi a3, sp, 152 +; RV32-NEXT: addi a1, sp, 144 +; RV32-NEXT: addi a0, sp, 136 +; RV32-NEXT: vlse64.v v10, (a2), zero +; RV32-NEXT: vlse64.v v12, (a3), zero +; RV32-NEXT: vlse64.v v14, (a1), zero +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: addi a1, sp, 120 +; RV32-NEXT: addi a2, sp, 112 +; RV32-NEXT: addi a3, sp, 104 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: addi a1, sp, 88 +; RV32-NEXT: addi a2, sp, 80 +; RV32-NEXT: addi a3, sp, 72 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: addi a1, sp, 56 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vlse64.v v12, (a1), zero +; RV32-NEXT: vlse64.v v14, (a2), zero +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v14 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vv v10, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 2 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 1 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 4 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vi v10, v8, 8 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: li a0, 1024 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, a5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v10, v8, a0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t1 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t2 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t3 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t4 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, t6 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s0 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s1 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s2 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s3 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s4 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s5 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s6 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s7 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s8 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s9 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vand.vx v10, v8, s10 +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: vmul.vv v12, v8, v6 +; RV32-NEXT: vmul.vv v14, v8, v4 +; RV32-NEXT: vmul.vv v16, v8, v2 +; RV32-NEXT: vmul.vv v18, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v20, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v20, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v22, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v22, v8, v22 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v24, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v26, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v26, v8, v26 +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v30, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v30, v8, v30 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v6, v8, v6 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v2, v8, v2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v10, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v8, v14 +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v18 +; RV32-NEXT: vxor.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v8, v22 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v26 +; RV32-NEXT: vxor.vv v8, v8, v28 +; RV32-NEXT: vxor.vv v8, v8, v30 +; RV32-NEXT: vxor.vv v8, v8, v6 +; RV32-NEXT: vxor.vv v8, v8, v4 +; RV32-NEXT: vxor.vv v8, v8, v2 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vx v10, v8, ra +; RV32-NEXT: vsll.vx v12, v8, ra +; RV32-NEXT: vsrl.vx v14, v8, s11 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vand.vx v14, v14, a4 +; RV32-NEXT: vsrl.vi v18, v8, 24 +; RV32-NEXT: vand.vx v20, v8, a6 +; RV32-NEXT: vand.vx v18, v18, a6 +; RV32-NEXT: vsll.vx v16, v16, s11 +; RV32-NEXT: vsrl.vi v22, v8, 8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v22, v22, v24 +; RV32-NEXT: vor.vv v10, v14, v10 +; RV32-NEXT: vor.vv v14, v22, v18 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vsll.vi v18, v20, 24 +; RV32-NEXT: vor.vv v8, v18, v8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v14, v10 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsrl.vi v10, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v12 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -224 +; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: li a7, 56 +; RV64-NEXT: li s1, 40 +; RV64-NEXT: lui s3, 16 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vsrl.vi v14, v8, 24 +; RV64-NEXT: lui t6, 4080 +; RV64-NEXT: vsrl.vi v10, v8, 8 +; RV64-NEXT: li s2, 255 +; RV64-NEXT: lui a5, 61681 +; RV64-NEXT: lui a6, 209715 +; RV64-NEXT: lui t5, 349525 +; RV64-NEXT: li t4, 16 +; RV64-NEXT: li t3, 32 +; RV64-NEXT: li t2, 64 +; RV64-NEXT: li t1, 128 +; RV64-NEXT: li t0, 256 +; RV64-NEXT: li a4, 512 +; RV64-NEXT: li a3, 1024 +; RV64-NEXT: li s0, 1 +; RV64-NEXT: lui a2, 1 +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: lui a0, 4 +; RV64-NEXT: vsrl.vx v12, v8, a7 +; RV64-NEXT: vsrl.vx v18, v8, s1 +; RV64-NEXT: addi s4, s3, -256 +; RV64-NEXT: vand.vx v16, v14, t6 +; RV64-NEXT: slli s2, s2, 24 +; RV64-NEXT: vand.vx v20, v8, t6 +; RV64-NEXT: vsll.vx v14, v8, a7 +; RV64-NEXT: addi a7, a5, -241 +; RV64-NEXT: addi a6, a6, 819 +; RV64-NEXT: addi a5, t5, 1365 +; RV64-NEXT: slli t5, s0, 11 +; RV64-NEXT: slli t6, s0, 31 +; RV64-NEXT: sd t6, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: slli t6, s0, 32 +; RV64-NEXT: sd t6, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: slli t6, s0, 33 +; RV64-NEXT: sd t6, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: slli t6, s0, 34 +; RV64-NEXT: sd t6, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: slli t6, s0, 35 +; RV64-NEXT: sd t6, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: slli t6, s0, 36 +; RV64-NEXT: sd t6, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: slli t6, a7, 32 +; RV64-NEXT: add a7, a7, t6 +; RV64-NEXT: slli t6, a6, 32 +; RV64-NEXT: add a6, a6, t6 +; RV64-NEXT: slli t6, a5, 32 +; RV64-NEXT: add a5, a5, t6 +; RV64-NEXT: slli t6, s0, 37 +; RV64-NEXT: sd t6, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v18, v18, s4 +; RV64-NEXT: vand.vx v10, v10, s2 +; RV64-NEXT: vsll.vi v20, v20, 24 +; RV64-NEXT: vand.vx v22, v8, s2 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vor.vv v12, v18, v12 +; RV64-NEXT: vor.vv v10, v10, v16 +; RV64-NEXT: vsll.vi v16, v22, 8 +; RV64-NEXT: vsll.vx v8, v8, s1 +; RV64-NEXT: vor.vv v10, v10, v12 +; RV64-NEXT: vor.vv v12, v20, v16 +; RV64-NEXT: vor.vv v8, v14, v8 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v10, v10, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v10, v10, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v10, v10, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vand.vx v10, v8, t4 +; RV64-NEXT: slli t4, s0, 38 +; RV64-NEXT: sd t4, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t3 +; RV64-NEXT: slli t3, s0, 39 +; RV64-NEXT: sd t3, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v14, v8, t2 +; RV64-NEXT: slli t2, s0, 40 +; RV64-NEXT: sd t2, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v16, v8, t1 +; RV64-NEXT: slli t1, s0, 41 +; RV64-NEXT: sd t1, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: vand.vx v18, v8, t0 +; RV64-NEXT: slli s6, s0, 42 +; RV64-NEXT: vand.vx v20, v8, a4 +; RV64-NEXT: slli s7, s0, 43 +; RV64-NEXT: vand.vx v22, v8, a3 +; RV64-NEXT: slli s8, s0, 44 +; RV64-NEXT: vand.vx v24, v8, t5 +; RV64-NEXT: slli s9, s0, 45 +; RV64-NEXT: vand.vx v26, v8, a2 +; RV64-NEXT: slli s10, s0, 46 +; RV64-NEXT: vand.vx v28, v8, a1 +; RV64-NEXT: slli s11, s0, 47 +; RV64-NEXT: vand.vx v30, v8, a0 +; RV64-NEXT: slli ra, s0, 48 +; RV64-NEXT: slli s3, s0, 49 +; RV64-NEXT: slli s1, s0, 50 +; RV64-NEXT: slli t6, s0, 51 +; RV64-NEXT: slli t5, s0, 52 +; RV64-NEXT: slli t4, s0, 53 +; RV64-NEXT: slli t3, s0, 54 +; RV64-NEXT: slli t2, s0, 55 +; RV64-NEXT: slli t1, s0, 56 +; RV64-NEXT: slli t0, s0, 57 +; RV64-NEXT: slli a4, s0, 58 +; RV64-NEXT: slli a3, s0, 59 +; RV64-NEXT: slli a2, s0, 60 +; RV64-NEXT: slli a1, s0, 61 +; RV64-NEXT: slli s0, s0, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vi v6, v8, 2 +; RV64-NEXT: vand.vi v4, v8, 1 +; RV64-NEXT: vand.vi v2, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v6, v8, v6 +; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v4 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v2 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v6, v8, v0 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v14 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v18 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v20 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v22 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v24 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v26 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v28 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vmul.vv v10, v8, v30 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 8 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 16 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 32 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 64 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 128 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 256 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 512 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 1024 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 2048 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 8192 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 16384 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 32768 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 65536 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 131072 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: lui s5, 262144 +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 112 +; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv a5, s5 +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: ld s5, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v10, v8, s5 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s6 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s7 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s8 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s9 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s10 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s11 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, ra +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 112 +; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s3 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: addi s3, sp, 112 +; RV64-NEXT: vs2r.v v10, (s3) # vscale x 16-byte Folded Spill +; RV64-NEXT: vand.vx v10, v8, s1 +; RV64-NEXT: vmul.vv v4, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t6 +; RV64-NEXT: vmul.vv v6, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t5 +; RV64-NEXT: vmul.vv v30, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t4 +; RV64-NEXT: vmul.vv v28, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t3 +; RV64-NEXT: vmul.vv v26, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t2 +; RV64-NEXT: vmul.vv v24, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t1 +; RV64-NEXT: vmul.vv v22, v8, v10 +; RV64-NEXT: vand.vx v10, v8, t0 +; RV64-NEXT: vmul.vv v20, v8, v10 +; RV64-NEXT: vand.vx v10, v8, a4 +; RV64-NEXT: vmul.vv v18, v8, v10 +; RV64-NEXT: vand.vx v10, v8, a3 +; RV64-NEXT: vmul.vv v16, v8, v10 +; RV64-NEXT: vand.vx v10, v8, a2 +; RV64-NEXT: vmul.vv v14, v8, v10 +; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vmul.vv v12, v8, v10 +; RV64-NEXT: vand.vx v10, v8, s0 +; RV64-NEXT: vmul.vv v10, v8, v10 +; RV64-NEXT: vand.vx v0, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v2, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v2 +; RV64-NEXT: addi a0, sp, 112 +; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload +; RV64-NEXT: vxor.vv v2, v0, v2 +; RV64-NEXT: vxor.vv v4, v2, v4 +; RV64-NEXT: vxor.vv v6, v4, v6 +; RV64-NEXT: vxor.vv v30, v6, v30 +; RV64-NEXT: vxor.vv v28, v30, v28 +; RV64-NEXT: vxor.vv v26, v28, v26 +; RV64-NEXT: vxor.vv v24, v26, v24 +; RV64-NEXT: vxor.vv v22, v24, v22 +; RV64-NEXT: vxor.vv v20, v22, v20 +; RV64-NEXT: vxor.vv v18, v20, v18 +; RV64-NEXT: vxor.vv v16, v18, v16 +; RV64-NEXT: vxor.vv v14, v16, v14 +; RV64-NEXT: vxor.vv v12, v14, v12 +; RV64-NEXT: vxor.vv v10, v12, v10 +; RV64-NEXT: vxor.vv v8, v10, v8 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v10, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v12, v8, a1 +; RV64-NEXT: vsrl.vi v14, v8, 24 +; RV64-NEXT: vsrl.vi v16, v8, 8 +; RV64-NEXT: vand.vx v12, v12, s4 +; RV64-NEXT: vor.vv v10, v12, v10 +; RV64-NEXT: vand.vx v12, v8, s2 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v14, v14, a2 +; RV64-NEXT: vand.vx v16, v16, s2 +; RV64-NEXT: vor.vv v14, v16, v14 +; RV64-NEXT: vand.vx v16, v8, a2 +; RV64-NEXT: vsll.vi v12, v12, 8 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vsll.vx v16, v8, a0 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: vor.vv v10, v14, v10 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsrl.vi v10, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v10, v10, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v10, v10, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: vsrl.vi v10, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v10, v10, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v10, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 224 +; RV64-NEXT: ret + %a = call <4 x i64> @llvm.clmulr.v4i64(<4 x i64> %x, <4 x i64> %y) + ret <4 x i64> %a +} + +define <8 x i64> @clmulr_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { +; RV32-LABEL: clmulr_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -352 +; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: lui s11, 1044480 +; RV32-NEXT: lui s0, 524288 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: li ra, 2 +; RV32-NEXT: li t4, 4 +; RV32-NEXT: li t2, 8 +; RV32-NEXT: li t6, 16 +; RV32-NEXT: li t5, 32 +; RV32-NEXT: li t3, 64 +; RV32-NEXT: li t1, 128 +; RV32-NEXT: li t0, 256 +; RV32-NEXT: li a7, 512 +; RV32-NEXT: li a6, 1024 +; RV32-NEXT: lui a4, 1 +; RV32-NEXT: lui a3, 2 +; RV32-NEXT: lui a2, 4 +; RV32-NEXT: lui a5, 8 +; RV32-NEXT: lui s1, 16 +; RV32-NEXT: lui a1, 32 +; RV32-NEXT: lui s2, 64 +; RV32-NEXT: lui s3, 128 +; RV32-NEXT: lui s4, 256 +; RV32-NEXT: lui s5, 512 +; RV32-NEXT: lui s6, 1024 +; RV32-NEXT: lui s7, 2048 +; RV32-NEXT: lui s8, 4096 +; RV32-NEXT: lui s9, 8192 +; RV32-NEXT: lui s10, 16384 +; RV32-NEXT: sw s11, 272(sp) +; RV32-NEXT: lui s11, 32768 +; RV32-NEXT: sw zero, 276(sp) +; RV32-NEXT: sw s0, 264(sp) +; RV32-NEXT: sw zero, 268(sp) +; RV32-NEXT: sw zero, 256(sp) +; RV32-NEXT: sw a0, 260(sp) +; RV32-NEXT: sw zero, 248(sp) +; RV32-NEXT: sw ra, 252(sp) +; RV32-NEXT: lui ra, 65536 +; RV32-NEXT: sw zero, 240(sp) +; RV32-NEXT: sw t4, 244(sp) +; RV32-NEXT: lui t4, 131072 +; RV32-NEXT: sw zero, 232(sp) +; RV32-NEXT: sw t2, 236(sp) +; RV32-NEXT: lui t2, 262144 +; RV32-NEXT: sw zero, 224(sp) +; RV32-NEXT: sw t6, 228(sp) +; RV32-NEXT: sw zero, 216(sp) +; RV32-NEXT: sw t5, 220(sp) +; RV32-NEXT: sw zero, 208(sp) +; RV32-NEXT: sw t3, 212(sp) +; RV32-NEXT: sw zero, 200(sp) +; RV32-NEXT: sw t1, 204(sp) +; RV32-NEXT: sw zero, 192(sp) +; RV32-NEXT: sw t0, 196(sp) +; RV32-NEXT: sw zero, 184(sp) +; RV32-NEXT: sw a7, 188(sp) +; RV32-NEXT: sw zero, 176(sp) +; RV32-NEXT: sw a6, 180(sp) +; RV32-NEXT: li t1, 1024 +; RV32-NEXT: slli t6, a0, 11 +; RV32-NEXT: sw zero, 168(sp) +; RV32-NEXT: sw t6, 172(sp) +; RV32-NEXT: sw zero, 160(sp) +; RV32-NEXT: sw a4, 164(sp) +; RV32-NEXT: sw zero, 152(sp) +; RV32-NEXT: sw a3, 156(sp) +; RV32-NEXT: lui t3, 2 +; RV32-NEXT: sw zero, 144(sp) +; RV32-NEXT: sw a2, 148(sp) +; RV32-NEXT: lui t5, 4 +; RV32-NEXT: sw zero, 136(sp) +; RV32-NEXT: sw a5, 140(sp) +; RV32-NEXT: lui a4, 8 +; RV32-NEXT: sw zero, 128(sp) +; RV32-NEXT: sw s1, 132(sp) +; RV32-NEXT: sw zero, 120(sp) +; RV32-NEXT: sw a1, 124(sp) +; RV32-NEXT: sw zero, 112(sp) +; RV32-NEXT: sw s2, 116(sp) +; RV32-NEXT: sw zero, 104(sp) +; RV32-NEXT: sw s3, 108(sp) +; RV32-NEXT: sw zero, 96(sp) +; RV32-NEXT: sw s4, 100(sp) +; RV32-NEXT: sw zero, 88(sp) +; RV32-NEXT: sw s5, 92(sp) +; RV32-NEXT: sw zero, 80(sp) +; RV32-NEXT: sw s6, 84(sp) +; RV32-NEXT: sw zero, 72(sp) +; RV32-NEXT: sw s7, 76(sp) +; RV32-NEXT: sw zero, 64(sp) +; RV32-NEXT: sw s8, 68(sp) +; RV32-NEXT: sw zero, 56(sp) +; RV32-NEXT: sw s9, 60(sp) +; RV32-NEXT: sw zero, 48(sp) +; RV32-NEXT: sw s10, 52(sp) +; RV32-NEXT: sw zero, 40(sp) +; RV32-NEXT: sw s11, 44(sp) +; RV32-NEXT: sw zero, 32(sp) +; RV32-NEXT: sw ra, 36(sp) +; RV32-NEXT: sw zero, 24(sp) +; RV32-NEXT: sw t4, 28(sp) +; RV32-NEXT: sw zero, 16(sp) +; RV32-NEXT: sw t2, 20(sp) +; RV32-NEXT: sw zero, 8(sp) +; RV32-NEXT: sw s0, 12(sp) +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v28, a1 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vmv.v.x v4, a1 +; RV32-NEXT: addi a1, sp, 272 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v0, (a1), zero +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v0, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li t0, 56 +; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vsrl.vx v12, v8, t0 +; RV32-NEXT: li a6, 40 +; RV32-NEXT: vsrl.vx v16, v8, a6 +; RV32-NEXT: vsll.vx v24, v8, t0 +; RV32-NEXT: addi a3, s1, -256 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vor.vv v16, v16, v12 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a6 +; RV32-NEXT: vor.vv v12, v24, v12 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: lui a5, 4080 +; RV32-NEXT: vand.vx v20, v20, a5 +; RV32-NEXT: lui a7, 349525 +; RV32-NEXT: addi a7, a7, 1365 +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vor.vv v20, v24, v20 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.v.x v24, a7 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vor.vv v16, v20, v16 +; RV32-NEXT: vand.vx v20, v8, a5 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: addi a7, sp, 264 +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: addi a7, sp, 256 +; RV32-NEXT: vlse64.v v12, (a7), zero +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v28 +; RV32-NEXT: vand.vv v16, v16, v28 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v4 +; RV32-NEXT: vand.vv v16, v16, v4 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: addi a7, sp, 248 +; RV32-NEXT: vlse64.v v16, (a7), zero +; RV32-NEXT: vand.vv v28, v8, v20 +; RV32-NEXT: addi a7, sp, 240 +; RV32-NEXT: addi a0, sp, 232 +; RV32-NEXT: vlse64.v v20, (a7), zero +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vand.vv v4, v8, v12 +; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a2, sp, 224 +; RV32-NEXT: addi a7, sp, 216 +; RV32-NEXT: addi a1, sp, 208 +; RV32-NEXT: addi a0, sp, 200 +; RV32-NEXT: vlse64.v v12, (a2), zero +; RV32-NEXT: vlse64.v v16, (a7), zero +; RV32-NEXT: vlse64.v v20, (a1), zero +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 192 +; RV32-NEXT: addi a1, sp, 184 +; RV32-NEXT: addi a2, sp, 176 +; RV32-NEXT: addi a7, sp, 168 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 160 +; RV32-NEXT: addi a1, sp, 152 +; RV32-NEXT: addi a2, sp, 144 +; RV32-NEXT: addi a7, sp, 136 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: addi a1, sp, 120 +; RV32-NEXT: addi a2, sp, 112 +; RV32-NEXT: addi a7, sp, 104 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 96 +; RV32-NEXT: addi a1, sp, 88 +; RV32-NEXT: addi a2, sp, 80 +; RV32-NEXT: addi a7, sp, 72 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: addi a1, sp, 56 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: addi a7, sp, 40 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: addi a7, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v20, (a2), zero +; RV32-NEXT: vlse64.v v24, (a7), zero +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v20 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vv v12, v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vi v12, v8, 8 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 16 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 64 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 128 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 256 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: li a0, 512 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s1 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: lui a0, 32 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s3 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s5 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s6 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s7 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s8 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s9 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s10 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, s11 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, ra +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t4 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vand.vx v12, v8, t2 +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: vmul.vv v16, v8, v28 +; RV32-NEXT: vmul.vv v20, v8, v4 +; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v28, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v28, v8, v28 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v4, v8, v4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v0, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v12, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vi v8, v8, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: addi a0, sp, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vxor.vv v8, v8, v20 +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vxor.vv v8, v8, v28 +; RV32-NEXT: vxor.vv v8, v8, v4 +; RV32-NEXT: vxor.vv v8, v8, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vx v12, v8, t0 +; RV32-NEXT: vsrl.vx v16, v8, a6 +; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vx v20, v20, a5 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v20, v8, a5 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vor.vv v20, v20, v24 +; RV32-NEXT: vsll.vx v24, v8, t0 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vsll.vx v8, v8, a6 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v8, v8, v20 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsrl.vi v12, v8, 4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vsll.vi v8, v8, 4 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 2 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsrl.vi v12, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 288 +; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 352 +; RV32-NEXT: ret +; +; RV64-LABEL: clmulr_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -240 +; RV64-NEXT: sd ra, 232(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 224(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 216(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 208(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 200(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 192(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 184(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s6, 176(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s7, 168(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s8, 160(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s9, 152(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s10, 144(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s11, 136(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: li t0, 40 +; RV64-NEXT: lui t1, 16 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vsrl.vi v20, v8, 24 +; RV64-NEXT: lui a7, 4080 +; RV64-NEXT: vsrl.vi v12, v8, 8 +; RV64-NEXT: li s0, 255 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 209715 +; RV64-NEXT: lui a5, 349525 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: li a1, 32 +; RV64-NEXT: li a0, 64 +; RV64-NEXT: li s9, 1 +; RV64-NEXT: vsrl.vx v16, v8, a6 +; RV64-NEXT: vsrl.vx v28, v8, t0 +; RV64-NEXT: addi s4, t1, -256 +; RV64-NEXT: vand.vx v24, v20, a7 +; RV64-NEXT: slli s0, s0, 24 +; RV64-NEXT: vand.vx v4, v8, a7 +; RV64-NEXT: vsll.vx v20, v8, a6 +; RV64-NEXT: addi a7, a3, -241 +; RV64-NEXT: addi a6, a4, 819 +; RV64-NEXT: addi a5, a5, 1365 +; RV64-NEXT: slli a3, s9, 11 +; RV64-NEXT: sd a3, 112(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 31 +; RV64-NEXT: sd a3, 104(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 32 +; RV64-NEXT: sd a3, 96(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 33 +; RV64-NEXT: sd a3, 88(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 34 +; RV64-NEXT: sd a3, 80(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 35 +; RV64-NEXT: sd a3, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 36 +; RV64-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 37 +; RV64-NEXT: sd a3, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 38 +; RV64-NEXT: sd a3, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 39 +; RV64-NEXT: sd a3, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 40 +; RV64-NEXT: sd a3, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a3, s9, 41 +; RV64-NEXT: sd a3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: slli s6, s9, 42 +; RV64-NEXT: slli s7, s9, 43 +; RV64-NEXT: slli a3, a7, 32 +; RV64-NEXT: add a7, a7, a3 +; RV64-NEXT: slli a3, a6, 32 +; RV64-NEXT: add a6, a6, a3 +; RV64-NEXT: slli a3, a5, 32 +; RV64-NEXT: add a5, a5, a3 +; RV64-NEXT: slli s8, s9, 44 +; RV64-NEXT: vand.vx v28, v28, s4 +; RV64-NEXT: vand.vx v12, v12, s0 +; RV64-NEXT: vsll.vi v4, v4, 24 +; RV64-NEXT: vand.vx v0, v8, s0 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vor.vv v16, v28, v16 +; RV64-NEXT: vor.vv v12, v12, v24 +; RV64-NEXT: vsll.vi v24, v0, 8 +; RV64-NEXT: vsll.vx v8, v8, t0 +; RV64-NEXT: vor.vv v12, v12, v16 +; RV64-NEXT: vor.vv v16, v4, v24 +; RV64-NEXT: vor.vv v8, v20, v8 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v12, v12, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v12, v12, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v12, v12, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vand.vx v12, v8, a2 +; RV64-NEXT: slli s10, s9, 45 +; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: slli s11, s9, 46 +; RV64-NEXT: vand.vx v20, v8, a0 +; RV64-NEXT: slli ra, s9, 47 +; RV64-NEXT: slli s3, s9, 48 +; RV64-NEXT: slli s2, s9, 49 +; RV64-NEXT: slli s1, s9, 50 +; RV64-NEXT: slli t6, s9, 51 +; RV64-NEXT: slli t5, s9, 52 +; RV64-NEXT: slli t4, s9, 53 +; RV64-NEXT: slli t3, s9, 54 +; RV64-NEXT: slli t2, s9, 55 +; RV64-NEXT: slli t1, s9, 56 +; RV64-NEXT: slli t0, s9, 57 +; RV64-NEXT: slli a4, s9, 58 +; RV64-NEXT: slli a3, s9, 59 +; RV64-NEXT: slli a2, s9, 60 +; RV64-NEXT: slli a1, s9, 61 +; RV64-NEXT: slli s9, s9, 62 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vand.vi v24, v8, 2 +; RV64-NEXT: vand.vi v28, v8, 1 +; RV64-NEXT: vand.vi v4, v8, 4 +; RV64-NEXT: vand.vi v0, v8, 8 +; RV64-NEXT: vmul.vv v24, v8, v24 +; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v28 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v4 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v24, v8, v0 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vmul.vv v12, v8, v20 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 128 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 256 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 512 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: li s5, 1024 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 112(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 1 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 2 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 4 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 8 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 16 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 32 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 64 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 128 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 256 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 512 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 1024 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 2048 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 4096 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 8192 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 7 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 16384 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 32768 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 65536 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 131072 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: lui s5, 262144 +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 5 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 4 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 1 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: mv s5, a5 +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, a5, s5 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 128 +; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv a5, s5 +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: add s5, s5, a5 +; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: vand.vx v12, v8, s5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s7 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s8 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s10 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 4 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s11 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 1 +; RV64-NEXT: add s6, s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, ra +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s5, vlenb +; RV64-NEXT: slli s5, s5, 3 +; RV64-NEXT: mv s6, s5 +; RV64-NEXT: slli s5, s5, 2 +; RV64-NEXT: add s5, s5, s6 +; RV64-NEXT: add s5, sp, s5 +; RV64-NEXT: addi s5, s5, 128 +; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s3, vlenb +; RV64-NEXT: slli s3, s3, 2 +; RV64-NEXT: mv s5, s3 +; RV64-NEXT: slli s3, s3, 3 +; RV64-NEXT: add s3, s3, s5 +; RV64-NEXT: add s3, sp, s3 +; RV64-NEXT: addi s3, s3, 128 +; RV64-NEXT: vs4r.v v12, (s3) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s2, vlenb +; RV64-NEXT: slli s2, s2, 5 +; RV64-NEXT: add s2, sp, s2 +; RV64-NEXT: addi s2, s2, 128 +; RV64-NEXT: vs4r.v v12, (s2) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, s1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr s1, vlenb +; RV64-NEXT: slli s1, s1, 2 +; RV64-NEXT: mv s2, s1 +; RV64-NEXT: slli s1, s1, 1 +; RV64-NEXT: add s2, s2, s1 +; RV64-NEXT: slli s1, s1, 1 +; RV64-NEXT: add s1, s1, s2 +; RV64-NEXT: add s1, sp, s1 +; RV64-NEXT: addi s1, s1, 128 +; RV64-NEXT: vs4r.v v12, (s1) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t6 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t6, vlenb +; RV64-NEXT: slli t6, t6, 3 +; RV64-NEXT: mv s1, t6 +; RV64-NEXT: slli t6, t6, 1 +; RV64-NEXT: add t6, t6, s1 +; RV64-NEXT: add t6, sp, t6 +; RV64-NEXT: addi t6, t6, 128 +; RV64-NEXT: vs4r.v v12, (t6) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t5 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t5, vlenb +; RV64-NEXT: slli t5, t5, 2 +; RV64-NEXT: mv t6, t5 +; RV64-NEXT: slli t5, t5, 2 +; RV64-NEXT: add t5, t5, t6 +; RV64-NEXT: add t5, sp, t5 +; RV64-NEXT: addi t5, t5, 128 +; RV64-NEXT: vs4r.v v12, (t5) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t4 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t4, vlenb +; RV64-NEXT: slli t4, t4, 4 +; RV64-NEXT: add t4, sp, t4 +; RV64-NEXT: addi t4, t4, 128 +; RV64-NEXT: vs4r.v v12, (t4) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t3 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: slli t3, t3, 2 +; RV64-NEXT: mv t4, t3 +; RV64-NEXT: slli t3, t3, 1 +; RV64-NEXT: add t3, t3, t4 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 128 +; RV64-NEXT: vs4r.v v12, (t3) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t2 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: slli t2, t2, 3 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 128 +; RV64-NEXT: vs4r.v v12, (t2) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t1 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: csrr t1, vlenb +; RV64-NEXT: slli t1, t1, 2 +; RV64-NEXT: add t1, sp, t1 +; RV64-NEXT: addi t1, t1, 128 +; RV64-NEXT: vs4r.v v12, (t1) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, t0 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: addi t0, sp, 128 +; RV64-NEXT: vs4r.v v12, (t0) # vscale x 32-byte Folded Spill +; RV64-NEXT: vand.vx v12, v8, a4 +; RV64-NEXT: vmul.vv v28, v8, v12 +; RV64-NEXT: vand.vx v12, v8, a3 +; RV64-NEXT: vmul.vv v24, v8, v12 +; RV64-NEXT: vand.vx v12, v8, a2 +; RV64-NEXT: vmul.vv v20, v8, v12 +; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vmul.vv v16, v8, v12 +; RV64-NEXT: vand.vx v12, v8, s9 +; RV64-NEXT: vmul.vv v12, v8, v12 +; RV64-NEXT: vand.vx v0, v8, a0 +; RV64-NEXT: vmul.vv v8, v8, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v4, v0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 7 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 5 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v0, v0, v4 +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; RV64-NEXT: vxor.vv v4, v0, v4 +; RV64-NEXT: vxor.vv v28, v4, v28 +; RV64-NEXT: vxor.vv v24, v28, v24 +; RV64-NEXT: vxor.vv v20, v24, v20 +; RV64-NEXT: vxor.vv v16, v20, v16 +; RV64-NEXT: vxor.vv v12, v16, v12 +; RV64-NEXT: vxor.vv v8, v12, v8 +; RV64-NEXT: li a0, 56 +; RV64-NEXT: vsrl.vx v12, v8, a0 +; RV64-NEXT: li a1, 40 +; RV64-NEXT: vsrl.vx v16, v8, a1 +; RV64-NEXT: vsrl.vi v20, v8, 24 +; RV64-NEXT: vsrl.vi v24, v8, 8 +; RV64-NEXT: vand.vx v16, v16, s4 +; RV64-NEXT: vor.vv v12, v16, v12 +; RV64-NEXT: vand.vx v16, v8, s0 +; RV64-NEXT: lui a2, 4080 +; RV64-NEXT: vand.vx v20, v20, a2 +; RV64-NEXT: vand.vx v24, v24, s0 +; RV64-NEXT: vor.vv v20, v24, v20 +; RV64-NEXT: vand.vx v24, v8, a2 +; RV64-NEXT: vsll.vi v16, v16, 8 +; RV64-NEXT: vsll.vi v24, v24, 24 +; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: vsll.vx v24, v8, a0 +; RV64-NEXT: vand.vx v8, v8, s4 +; RV64-NEXT: vsll.vx v8, v8, a1 +; RV64-NEXT: vor.vv v8, v24, v8 +; RV64-NEXT: vor.vv v12, v20, v12 +; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vor.vv v8, v8, v12 +; RV64-NEXT: vsrl.vi v12, v8, 4 +; RV64-NEXT: vand.vx v8, v8, a7 +; RV64-NEXT: vand.vx v12, v12, a7 +; RV64-NEXT: vsll.vi v8, v8, 4 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 2 +; RV64-NEXT: vand.vx v8, v8, a6 +; RV64-NEXT: vand.vx v12, v12, a6 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsrl.vi v12, v8, 1 +; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v12, v12, a5 +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 232(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 224(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 216(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 208(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 200(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 192(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s5, 184(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s6, 176(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s7, 168(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s8, 160(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s9, 152(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s10, 144(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s11, 136(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 240 +; RV64-NEXT: ret + %a = call <8 x i64> @llvm.clmulr.v8i64(<8 x i64> %x, <8 x i64> %y) + ret <8 x i64> %a +} From 0e5e1c48aedac39dfef9303294cc05adf7fd842c Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 19 Nov 2025 22:11:16 +0000 Subject: [PATCH 02/13] [ISel] Updates to const-fold tests --- llvm/test/CodeGen/RISCV/clmul.ll | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll index 1e3acd8ccce74..f997e56d91178 100644 --- a/llvm/test/CodeGen/RISCV/clmul.ll +++ b/llvm/test/CodeGen/RISCV/clmul.ll @@ -3233,17 +3233,11 @@ define i4 @clmul_constfold_i4() nounwind { } define i16 @clmul_constfold_i16() nounwind { -; RV32IM-LABEL: clmul_constfold_i16: -; RV32IM: # %bb.0: -; RV32IM-NEXT: lui a0, 699051 -; RV32IM-NEXT: addi a0, a0, -1366 -; RV32IM-NEXT: ret -; -; RV64IM-LABEL: clmul_constfold_i16: -; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a0, %hi(.LCPI6_0) -; RV64IM-NEXT: ld a0, %lo(.LCPI6_0)(a0) -; RV64IM-NEXT: ret +; CHECK-LABEL: clmul_constfold_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addi a0, a0, -1366 +; CHECK-NEXT: ret %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1) ret i16 %res } @@ -7566,17 +7560,11 @@ define i4 @clmulr_constfold_i4() nounwind { } define i16 @clmulr_constfold_i16() nounwind { -; RV32IM-LABEL: clmulr_constfold_i16: -; RV32IM: # %bb.0: -; RV32IM-NEXT: lui a0, 699051 -; RV32IM-NEXT: addi a0, a0, -1366 -; RV32IM-NEXT: ret -; -; RV64IM-LABEL: clmulr_constfold_i16: -; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a0, %hi(.LCPI13_0) -; RV64IM-NEXT: ld a0, %lo(.LCPI13_0)(a0) -; RV64IM-NEXT: ret +; CHECK-LABEL: clmulr_constfold_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addi a0, a0, -1365 +; CHECK-NEXT: ret %res = call i16 @llvm.clmulr.i16(i16 -2, i16 -1) ret i16 %res } From 244a3d842ab8ed7b9905cf83eff6aceae5c46d5a Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 09:15:44 +0000 Subject: [PATCH 03/13] [ISel] Strip llvm.clmulr --- llvm/docs/LangRef.rst | 48 - llvm/include/llvm/IR/Intrinsics.td | 2 - .../SelectionDAG/SelectionDAGBuilder.cpp | 18 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 11 +- llvm/test/CodeGen/RISCV/clmul.ll | 4327 ---- llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll | 18639 ---------------- .../CodeGen/RISCV/rvv/fixed-vectors-clmul.ll | 14737 ------------ 7 files changed, 7 insertions(+), 37775 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a33e2bdceafb8..4fdca9c1a4dbc 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -18436,54 +18436,6 @@ Example: %r = call i4 @llvm.clmul.i4(i4 -4, i4 2) ; %r = -8 %r = call i4 @llvm.clmul.i4(i4 -4, i4 -5) ; %r = 4 -'``llvm.clmulr.*``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -This is an overloaded intrinsic. You can use ``llvm.clmulr`` on any integer -or vectors of integer elements. - -:: - - declare i16 @llvm.clmulr.i16(i16 %a, i16 %b) - declare i32 @llvm.clmulr.i32(i32 %a, i32 %b) - declare i64 @llvm.clmulr.i64(i64 %a, i64 %b) - declare <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %a, <4 x i32> %b) - -Overview: -""""""""" - -The '``llvm.clmulr``' family of intrinsic functions performs reversed -carry-less multiplication on the two arguments. - -Arguments: -"""""""""" - -The arguments may be any integer type or vector of integer type. Both arguments -and result must have the same type. - -Semantics: -"""""""""" - -The '``llvm.clmulr``' intrinsic computes reversed carry-less multiply of its -arguments. The vector variants operate lane-wise. - -.. code-block:: text - - clmulr(%a, %b) = bitreverse(clmul(bitreverse(%a), bitreverse(%b))) - -Example: -"""""""" - -.. code-block:: llvm - - %r = call i4 @llvm.clmulr.i4(i4 1, i4 2) ; %r = 0 - %r = call i4 @llvm.clmulr.i4(i4 5, i4 6) ; %r = 3 - %r = call i4 @llvm.clmulr.i4(i4 -4, i4 2) ; %r = 3 - %r = call i4 @llvm.clmulr.i4(i4 -4, i4 -5) ; %r = -2 - .. _int_overflow: Arithmetic with Overflow Intrinsics diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index fb8857cec2075..f0aed94529cfb 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1467,8 +1467,6 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrNoCreateUndefOrPoison] in [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_clmulr : DefaultAttrsIntrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, LLVMMatchType<0>]>; } let IntrProperties = [IntrNoMem, IntrSpeculatable, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 428eaeb3a1dde..27129b37e1922 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7279,20 +7279,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } return; } - case Intrinsic::clmul: - case Intrinsic::clmulr: { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - unsigned Opcode; - switch (Intrinsic) { - case Intrinsic::clmul: - Opcode = ISD::CLMUL; - break; - case Intrinsic::clmulr: - Opcode = ISD::CLMULR; - break; - } - setValue(&I, DAG.getNode(Opcode, sdl, Op1.getValueType(), Op1, Op2)); + case Intrinsic::clmul: { + SDValue X = getValue(I.getArgOperand(0)); + SDValue Y = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::CLMUL, sdl, X.getValueType(), X, Y)); return; } case Intrinsic::sadd_sat: { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7db1dad5b4426..79627466bad0d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8324,20 +8324,15 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { } break; } - case ISD::CLMULR: { - SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X); - SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X); - SDValue ResR = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev); - Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ResR); - break; - } + case ISD::CLMULR: case ISD::CLMULH: { EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 2 * BW); SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X); SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y); SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt); + unsigned ShtAmt = Node->getOpcode() == ISD::CLMULR ? BW - 1 : BW; SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul, - DAG.getShiftAmountConstant(BW, VT, DL)); + DAG.getShiftAmountConstant(ShtAmt, VT, DL)); Res = DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits); break; } diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll index f997e56d91178..da4f4d3075133 100644 --- a/llvm/test/CodeGen/RISCV/clmul.ll +++ b/llvm/test/CodeGen/RISCV/clmul.ll @@ -3241,4330 +3241,3 @@ define i16 @clmul_constfold_i16() nounwind { %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1) ret i16 %res } - -define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { -; RV32IM-LABEL: clmulr_i4: -; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -144 -; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill -; RV32IM-NEXT: srli a3, a0, 8 -; RV32IM-NEXT: lui s9, 16 -; RV32IM-NEXT: srli a4, a0, 24 -; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui a7, 61681 -; RV32IM-NEXT: lui ra, 209715 -; RV32IM-NEXT: lui a1, 349525 -; RV32IM-NEXT: li s0, 1 -; RV32IM-NEXT: lui t1, 1 -; RV32IM-NEXT: lui t2, 2 -; RV32IM-NEXT: lui t3, 4 -; RV32IM-NEXT: lui t4, 8 -; RV32IM-NEXT: lui t0, 32 -; RV32IM-NEXT: lui a6, 64 -; RV32IM-NEXT: lui a5, 128 -; RV32IM-NEXT: lui s1, 256 -; RV32IM-NEXT: lui t5, 512 -; RV32IM-NEXT: lui t6, 1024 -; RV32IM-NEXT: lui s4, 2048 -; RV32IM-NEXT: lui s2, 4096 -; RV32IM-NEXT: lui s3, 8192 -; RV32IM-NEXT: lui s7, 16384 -; RV32IM-NEXT: lui s5, 32768 -; RV32IM-NEXT: lui s6, 65536 -; RV32IM-NEXT: lui s11, 131072 -; RV32IM-NEXT: lui s8, 262144 -; RV32IM-NEXT: addi s10, s9, -256 -; RV32IM-NEXT: and a3, a3, s10 -; RV32IM-NEXT: or a3, a3, a4 -; RV32IM-NEXT: addi a7, a7, -241 -; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a4, ra, 819 -; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a1, a1, 1365 -; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: slli s0, s0, 11 -; RV32IM-NEXT: and a0, a0, s10 -; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: srli a2, a0, 4 -; RV32IM-NEXT: and a0, a0, a7 -; RV32IM-NEXT: and a2, a2, a7 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 2 -; RV32IM-NEXT: and a0, a0, a4 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 1 -; RV32IM-NEXT: and a0, a0, a1 -; RV32IM-NEXT: and a2, a2, a1 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a3, a2, a0 -; RV32IM-NEXT: andi a0, a3, 2 -; RV32IM-NEXT: andi a1, a3, 1 -; RV32IM-NEXT: and a4, a3, s0 -; RV32IM-NEXT: and a7, a3, t1 -; RV32IM-NEXT: and s0, a3, t2 -; RV32IM-NEXT: and ra, a3, t3 -; RV32IM-NEXT: and a2, a3, t4 -; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s9 -; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t0 -; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a6, a3, a6 -; RV32IM-NEXT: and a5, a3, a5 -; RV32IM-NEXT: and s1, a3, s1 -; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t5 -; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t6, a3, t6 -; RV32IM-NEXT: and a2, a3, s4 -; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s2, a3, s2 -; RV32IM-NEXT: and a2, a3, s3 -; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s7 -; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s5 -; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s6 -; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s11 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s8 -; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a2, 524288 -; RV32IM-NEXT: and a2, a3, a2 -; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a0, a3, 4 -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a1, a3, 8 -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a2, a3, 16 -; RV32IM-NEXT: mul s9, a3, a2 -; RV32IM-NEXT: andi t0, a3, 32 -; RV32IM-NEXT: mul s6, a3, t0 -; RV32IM-NEXT: andi t1, a3, 64 -; RV32IM-NEXT: mul a0, a3, t1 -; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 128 -; RV32IM-NEXT: mul a0, a3, t2 -; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 256 -; RV32IM-NEXT: mul s1, a3, t2 -; RV32IM-NEXT: andi t3, a3, 512 -; RV32IM-NEXT: mul t5, a3, t3 -; RV32IM-NEXT: andi t4, a3, 1024 -; RV32IM-NEXT: mul s5, a3, t4 -; RV32IM-NEXT: mul s8, a3, a4 -; RV32IM-NEXT: mul a0, a3, a7 -; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t2, a3, s0 -; RV32IM-NEXT: mul a7, a3, ra -; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a3, a0 -; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s4, a3, a0 -; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s7, a3, a0 -; RV32IM-NEXT: mul a0, a3, a6 -; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a6, a3, a5 -; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a3, a0 -; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t1, a3, a0 -; RV32IM-NEXT: mul t4, a3, t6 -; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s3, a3, a0 -; RV32IM-NEXT: mul a2, a3, s2 -; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a3, a0 -; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a3, a0 -; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a3, a0 -; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t3, a3, a0 -; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t6, a3, a0 -; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s2, a3, a0 -; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a3, a3, a0 -; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, s11, a0 -; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s11, s11, ra -; RV32IM-NEXT: xor s6, s9, s6 -; RV32IM-NEXT: xor t5, s1, t5 -; RV32IM-NEXT: xor a7, t2, a7 -; RV32IM-NEXT: xor a4, a6, a4 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: xor a0, a0, s11 -; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, s6, a2 -; RV32IM-NEXT: xor a6, t5, s5 -; RV32IM-NEXT: xor a7, a7, s0 -; RV32IM-NEXT: xor a4, a4, t1 -; RV32IM-NEXT: xor a1, a1, a5 -; RV32IM-NEXT: xor a0, a0, a2 -; RV32IM-NEXT: xor a2, a6, s8 -; RV32IM-NEXT: xor a5, a7, s4 -; RV32IM-NEXT: xor a4, a4, t4 -; RV32IM-NEXT: xor a1, a1, t0 -; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, a6 -; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, a6 -; RV32IM-NEXT: xor a5, a5, s7 -; RV32IM-NEXT: xor a4, a4, s3 -; RV32IM-NEXT: xor a1, a1, t3 -; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a1, a1, t6 -; RV32IM-NEXT: xor a2, a0, a2 -; RV32IM-NEXT: xor a2, a2, a5 -; RV32IM-NEXT: slli a0, a0, 24 -; RV32IM-NEXT: xor a1, a1, s2 -; RV32IM-NEXT: xor a2, a2, a4 -; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: and a3, a2, s10 -; RV32IM-NEXT: srli a4, a2, 8 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, s10 -; RV32IM-NEXT: srli a1, a1, 24 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: or a1, a2, a1 -; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 144 -; RV32IM-NEXT: ret -; -; RV64IM-LABEL: clmulr_i4: -; RV64IM: # %bb.0: -; RV64IM-NEXT: addi sp, sp, -448 -; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a2, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 -; RV64IM-NEXT: li a3, 255 -; RV64IM-NEXT: srli a5, a0, 40 -; RV64IM-NEXT: lui s3, 16 -; RV64IM-NEXT: srli s0, a0, 56 -; RV64IM-NEXT: srliw t2, a0, 24 -; RV64IM-NEXT: slli t0, a0, 56 -; RV64IM-NEXT: lui t3, 61681 -; RV64IM-NEXT: lui t4, 209715 -; RV64IM-NEXT: lui t6, 349525 -; RV64IM-NEXT: li a7, 1 -; RV64IM-NEXT: lui s5, 2 -; RV64IM-NEXT: lui t1, 4 -; RV64IM-NEXT: lui a4, 128 -; RV64IM-NEXT: lui s7, 256 -; RV64IM-NEXT: lui s8, 4096 -; RV64IM-NEXT: lui s10, 8192 -; RV64IM-NEXT: lui a1, 4080 -; RV64IM-NEXT: and a2, a2, a1 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill -; RV64IM-NEXT: addi s1, s3, -256 -; RV64IM-NEXT: and t5, a0, a1 -; RV64IM-NEXT: slli a1, t2, 32 -; RV64IM-NEXT: addi s9, t3, -241 -; RV64IM-NEXT: addi t4, t4, 819 -; RV64IM-NEXT: addi t2, t6, 1365 -; RV64IM-NEXT: slli t3, a7, 11 -; RV64IM-NEXT: slli s11, a7, 32 -; RV64IM-NEXT: slli ra, a7, 33 -; RV64IM-NEXT: slli t6, a7, 34 -; RV64IM-NEXT: slli s2, a7, 35 -; RV64IM-NEXT: slli s4, a7, 36 -; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a6, a3 -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: slli a3, a7, 37 -; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a5, s1 -; RV64IM-NEXT: or a3, a3, s0 -; RV64IM-NEXT: slli a5, a7, 38 -; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t5, t5, 24 -; RV64IM-NEXT: and a0, a0, s1 -; RV64IM-NEXT: or a1, t5, a1 -; RV64IM-NEXT: slli a5, s9, 32 -; RV64IM-NEXT: add a5, s9, a5 -; RV64IM-NEXT: slli s0, t4, 32 -; RV64IM-NEXT: add t4, t4, s0 -; RV64IM-NEXT: slli s4, t2, 32 -; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: add t2, t2, s4 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a0, t0, a0 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, a5 -; RV64IM-NEXT: and a1, a1, a5 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t4 -; RV64IM-NEXT: and a1, a1, t4 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t2 -; RV64IM-NEXT: and a1, a1, t2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or t0, a1, a0 -; RV64IM-NEXT: andi a0, t0, 2 -; RV64IM-NEXT: andi a1, t0, 1 -; RV64IM-NEXT: andi a2, t0, 4 -; RV64IM-NEXT: andi a3, t0, 8 -; RV64IM-NEXT: andi a5, t0, 16 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 32 -; RV64IM-NEXT: mul a1, t0, a2 -; RV64IM-NEXT: mul a2, t0, a3 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a1, t0, 256 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a2, a0 -; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 512 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t4, a7, 39 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: and a1, t0, t1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 40 -; RV64IM-NEXT: and a1, t0, a4 -; RV64IM-NEXT: and a2, t0, s7 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 41 -; RV64IM-NEXT: and a2, t0, s8 -; RV64IM-NEXT: and a3, t0, s10 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 48 -; RV64IM-NEXT: and a3, t0, s11 -; RV64IM-NEXT: and a4, t0, ra -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: mul a4, t0, a4 -; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a3, a7, 49 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 56 -; RV64IM-NEXT: and a1, t0, a2 -; RV64IM-NEXT: and a2, t0, a3 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 57 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 42 -; RV64IM-NEXT: slli ra, a7, 43 -; RV64IM-NEXT: slli a3, a7, 44 -; RV64IM-NEXT: slli a4, a7, 45 -; RV64IM-NEXT: slli t5, a7, 46 -; RV64IM-NEXT: slli s0, a7, 47 -; RV64IM-NEXT: slli s1, a7, 50 -; RV64IM-NEXT: slli a0, a7, 51 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 52 -; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 53 -; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 54 -; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 55 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 58 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 59 -; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 60 -; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 61 -; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a7, a7, 62 -; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, t3 -; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s7, 1 -; RV64IM-NEXT: and a0, t0, s7 -; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s8, 8 -; RV64IM-NEXT: and a0, t0, s8 -; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s3 -; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s6, 32 -; RV64IM-NEXT: and a0, t0, s6 -; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s10, 64 -; RV64IM-NEXT: and a0, t0, s10 -; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s11, 512 -; RV64IM-NEXT: and a0, t0, s11 -; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s4, 1024 -; RV64IM-NEXT: and a0, t0, s4 -; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s5, 2048 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s9, 16384 -; RV64IM-NEXT: and a0, t0, s9 -; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui a5, 32768 -; RV64IM-NEXT: and a5, t0, a5 -; RV64IM-NEXT: lui a6, 65536 -; RV64IM-NEXT: and a6, t0, a6 -; RV64IM-NEXT: lui t1, 131072 -; RV64IM-NEXT: and t1, t0, t1 -; RV64IM-NEXT: lui t2, 262144 -; RV64IM-NEXT: and t2, t0, t2 -; RV64IM-NEXT: and a0, t0, t6 -; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s2 -; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, t4 -; RV64IM-NEXT: and a7, t0, a2 -; RV64IM-NEXT: and ra, t0, ra -; RV64IM-NEXT: and t3, t0, a3 -; RV64IM-NEXT: and t4, t0, a4 -; RV64IM-NEXT: and t5, t0, t5 -; RV64IM-NEXT: and t6, t0, s0 -; RV64IM-NEXT: and s0, t0, s1 -; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s1, t0, a2 -; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s2, t0, a2 -; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s3, t0, a2 -; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s4, t0, a2 -; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s5, t0, a2 -; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s6, t0, a2 -; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s7, t0, a2 -; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s8, t0, a2 -; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s9, t0, a2 -; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s10, t0, a2 -; RV64IM-NEXT: andi s11, t0, 64 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 128 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 1024 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul s11, t0, a2 -; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a4, t0, a2 -; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a3, t0, a2 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, a6 -; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t1 -; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t2 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srliw t2, t0, 31 -; RV64IM-NEXT: slli t2, t2, 31 -; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a5, t0, a5 -; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t1, t0, a6 -; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a6, t0, a6 -; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a1 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a7, t0, a7 -; RV64IM-NEXT: mul ra, t0, ra -; RV64IM-NEXT: mul a6, t0, t3 -; RV64IM-NEXT: mul t4, t0, t4 -; RV64IM-NEXT: mul t5, t0, t5 -; RV64IM-NEXT: mul a0, t0, t6 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul t6, t0, s0 -; RV64IM-NEXT: mul s0, t0, s1 -; RV64IM-NEXT: mul s1, t0, s2 -; RV64IM-NEXT: mul s2, t0, s3 -; RV64IM-NEXT: mul s3, t0, s4 -; RV64IM-NEXT: mul s4, t0, s5 -; RV64IM-NEXT: mul s5, t0, s6 -; RV64IM-NEXT: mul s6, t0, s7 -; RV64IM-NEXT: mul s7, t0, s8 -; RV64IM-NEXT: mul s8, t0, s9 -; RV64IM-NEXT: mul s9, t0, s10 -; RV64IM-NEXT: srli s10, t0, 63 -; RV64IM-NEXT: slli s10, s10, 63 -; RV64IM-NEXT: mul t2, t0, t2 -; RV64IM-NEXT: mul t0, t0, s10 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, a0, a1 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s11, t3, s11 -; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, t3, a4 -; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, t3, a3 -; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, t3, a2 -; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, t3, a7 -; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t6, t3, t6 -; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, t3, s5 -; RV64IM-NEXT: xor a0, s10, a0 -; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, s11, t3 -; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, a5 -; RV64IM-NEXT: xor a5, a7, ra -; RV64IM-NEXT: xor a7, t6, s0 -; RV64IM-NEXT: xor t6, s5, s6 -; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, t3 -; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s10, t3 -; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: xor a6, a7, s1 -; RV64IM-NEXT: xor a7, t6, s7 -; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s0, t1 -; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t3 -; RV64IM-NEXT: xor a5, a5, t4 -; RV64IM-NEXT: xor a6, a6, s2 -; RV64IM-NEXT: xor a7, a7, s8 -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: xor a1, a1, t1 -; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t1 -; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, t5 -; RV64IM-NEXT: xor a6, a6, s3 -; RV64IM-NEXT: xor a7, a7, s9 -; RV64IM-NEXT: xor a1, a1, a4 -; RV64IM-NEXT: xor a3, a3, t2 -; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a4 -; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a5, a4 -; RV64IM-NEXT: xor a5, a6, s4 -; RV64IM-NEXT: slli a0, a0, 56 -; RV64IM-NEXT: xor a6, a7, t0 -; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a7, a1, t0 -; RV64IM-NEXT: xor a1, a1, a3 -; RV64IM-NEXT: slli a7, a7, 40 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: or a0, a0, a7 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a2, a1, a7 -; RV64IM-NEXT: xor a4, a1, a4 -; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: slli a2, a2, 24 -; RV64IM-NEXT: xor a5, a4, a5 -; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, a1, a3 -; RV64IM-NEXT: srli a4, a4, 24 -; RV64IM-NEXT: srliw a3, a5, 24 -; RV64IM-NEXT: and a4, a4, a7 -; RV64IM-NEXT: srli a7, a5, 40 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: slli a3, a3, 32 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: and a4, a7, t0 -; RV64IM-NEXT: srli a5, a5, 56 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a4, a4, a5 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: addi sp, sp, 448 -; RV64IM-NEXT: ret - %res = call i4 @llvm.clmulr.i4(i4 %a, i4 %b) - ret i4 %res -} - -define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { -; RV32IM-LABEL: clmulr_i8: -; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -144 -; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill -; RV32IM-NEXT: srli a3, a0, 8 -; RV32IM-NEXT: lui s9, 16 -; RV32IM-NEXT: srli a4, a0, 24 -; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui a7, 61681 -; RV32IM-NEXT: lui ra, 209715 -; RV32IM-NEXT: lui a1, 349525 -; RV32IM-NEXT: li s0, 1 -; RV32IM-NEXT: lui t1, 1 -; RV32IM-NEXT: lui t2, 2 -; RV32IM-NEXT: lui t3, 4 -; RV32IM-NEXT: lui t4, 8 -; RV32IM-NEXT: lui t0, 32 -; RV32IM-NEXT: lui a6, 64 -; RV32IM-NEXT: lui a5, 128 -; RV32IM-NEXT: lui s1, 256 -; RV32IM-NEXT: lui t5, 512 -; RV32IM-NEXT: lui t6, 1024 -; RV32IM-NEXT: lui s4, 2048 -; RV32IM-NEXT: lui s2, 4096 -; RV32IM-NEXT: lui s3, 8192 -; RV32IM-NEXT: lui s7, 16384 -; RV32IM-NEXT: lui s5, 32768 -; RV32IM-NEXT: lui s6, 65536 -; RV32IM-NEXT: lui s11, 131072 -; RV32IM-NEXT: lui s8, 262144 -; RV32IM-NEXT: addi s10, s9, -256 -; RV32IM-NEXT: and a3, a3, s10 -; RV32IM-NEXT: or a3, a3, a4 -; RV32IM-NEXT: addi a7, a7, -241 -; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a4, ra, 819 -; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a1, a1, 1365 -; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: slli s0, s0, 11 -; RV32IM-NEXT: and a0, a0, s10 -; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: srli a2, a0, 4 -; RV32IM-NEXT: and a0, a0, a7 -; RV32IM-NEXT: and a2, a2, a7 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 2 -; RV32IM-NEXT: and a0, a0, a4 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 1 -; RV32IM-NEXT: and a0, a0, a1 -; RV32IM-NEXT: and a2, a2, a1 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a3, a2, a0 -; RV32IM-NEXT: andi a0, a3, 2 -; RV32IM-NEXT: andi a1, a3, 1 -; RV32IM-NEXT: and a4, a3, s0 -; RV32IM-NEXT: and a7, a3, t1 -; RV32IM-NEXT: and s0, a3, t2 -; RV32IM-NEXT: and ra, a3, t3 -; RV32IM-NEXT: and a2, a3, t4 -; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s9 -; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t0 -; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a6, a3, a6 -; RV32IM-NEXT: and a5, a3, a5 -; RV32IM-NEXT: and s1, a3, s1 -; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t5 -; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t6, a3, t6 -; RV32IM-NEXT: and a2, a3, s4 -; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s2, a3, s2 -; RV32IM-NEXT: and a2, a3, s3 -; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s7 -; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s5 -; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s6 -; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s11 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s8 -; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a2, 524288 -; RV32IM-NEXT: and a2, a3, a2 -; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a0, a3, 4 -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a1, a3, 8 -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a2, a3, 16 -; RV32IM-NEXT: mul s9, a3, a2 -; RV32IM-NEXT: andi t0, a3, 32 -; RV32IM-NEXT: mul s6, a3, t0 -; RV32IM-NEXT: andi t1, a3, 64 -; RV32IM-NEXT: mul a0, a3, t1 -; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 128 -; RV32IM-NEXT: mul a0, a3, t2 -; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 256 -; RV32IM-NEXT: mul s1, a3, t2 -; RV32IM-NEXT: andi t3, a3, 512 -; RV32IM-NEXT: mul t5, a3, t3 -; RV32IM-NEXT: andi t4, a3, 1024 -; RV32IM-NEXT: mul s5, a3, t4 -; RV32IM-NEXT: mul s8, a3, a4 -; RV32IM-NEXT: mul a0, a3, a7 -; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t2, a3, s0 -; RV32IM-NEXT: mul a7, a3, ra -; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a3, a0 -; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s4, a3, a0 -; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s7, a3, a0 -; RV32IM-NEXT: mul a0, a3, a6 -; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a6, a3, a5 -; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a3, a0 -; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t1, a3, a0 -; RV32IM-NEXT: mul t4, a3, t6 -; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s3, a3, a0 -; RV32IM-NEXT: mul a2, a3, s2 -; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a3, a0 -; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a3, a0 -; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a3, a0 -; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t3, a3, a0 -; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t6, a3, a0 -; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s2, a3, a0 -; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a3, a3, a0 -; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, s11, a0 -; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s11, s11, ra -; RV32IM-NEXT: xor s6, s9, s6 -; RV32IM-NEXT: xor t5, s1, t5 -; RV32IM-NEXT: xor a7, t2, a7 -; RV32IM-NEXT: xor a4, a6, a4 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: xor a0, a0, s11 -; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, s6, a2 -; RV32IM-NEXT: xor a6, t5, s5 -; RV32IM-NEXT: xor a7, a7, s0 -; RV32IM-NEXT: xor a4, a4, t1 -; RV32IM-NEXT: xor a1, a1, a5 -; RV32IM-NEXT: xor a0, a0, a2 -; RV32IM-NEXT: xor a2, a6, s8 -; RV32IM-NEXT: xor a5, a7, s4 -; RV32IM-NEXT: xor a4, a4, t4 -; RV32IM-NEXT: xor a1, a1, t0 -; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, a6 -; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, a6 -; RV32IM-NEXT: xor a5, a5, s7 -; RV32IM-NEXT: xor a4, a4, s3 -; RV32IM-NEXT: xor a1, a1, t3 -; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a1, a1, t6 -; RV32IM-NEXT: xor a2, a0, a2 -; RV32IM-NEXT: xor a2, a2, a5 -; RV32IM-NEXT: slli a0, a0, 24 -; RV32IM-NEXT: xor a1, a1, s2 -; RV32IM-NEXT: xor a2, a2, a4 -; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: and a3, a2, s10 -; RV32IM-NEXT: srli a4, a2, 8 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, s10 -; RV32IM-NEXT: srli a1, a1, 24 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: or a1, a2, a1 -; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 144 -; RV32IM-NEXT: ret -; -; RV64IM-LABEL: clmulr_i8: -; RV64IM: # %bb.0: -; RV64IM-NEXT: addi sp, sp, -448 -; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a2, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 -; RV64IM-NEXT: li a3, 255 -; RV64IM-NEXT: srli a5, a0, 40 -; RV64IM-NEXT: lui s3, 16 -; RV64IM-NEXT: srli s0, a0, 56 -; RV64IM-NEXT: srliw t2, a0, 24 -; RV64IM-NEXT: slli t0, a0, 56 -; RV64IM-NEXT: lui t3, 61681 -; RV64IM-NEXT: lui t4, 209715 -; RV64IM-NEXT: lui t6, 349525 -; RV64IM-NEXT: li a7, 1 -; RV64IM-NEXT: lui s5, 2 -; RV64IM-NEXT: lui t1, 4 -; RV64IM-NEXT: lui a4, 128 -; RV64IM-NEXT: lui s7, 256 -; RV64IM-NEXT: lui s8, 4096 -; RV64IM-NEXT: lui s10, 8192 -; RV64IM-NEXT: lui a1, 4080 -; RV64IM-NEXT: and a2, a2, a1 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill -; RV64IM-NEXT: addi s1, s3, -256 -; RV64IM-NEXT: and t5, a0, a1 -; RV64IM-NEXT: slli a1, t2, 32 -; RV64IM-NEXT: addi s9, t3, -241 -; RV64IM-NEXT: addi t4, t4, 819 -; RV64IM-NEXT: addi t2, t6, 1365 -; RV64IM-NEXT: slli t3, a7, 11 -; RV64IM-NEXT: slli s11, a7, 32 -; RV64IM-NEXT: slli ra, a7, 33 -; RV64IM-NEXT: slli t6, a7, 34 -; RV64IM-NEXT: slli s2, a7, 35 -; RV64IM-NEXT: slli s4, a7, 36 -; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a6, a3 -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: slli a3, a7, 37 -; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a5, s1 -; RV64IM-NEXT: or a3, a3, s0 -; RV64IM-NEXT: slli a5, a7, 38 -; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t5, t5, 24 -; RV64IM-NEXT: and a0, a0, s1 -; RV64IM-NEXT: or a1, t5, a1 -; RV64IM-NEXT: slli a5, s9, 32 -; RV64IM-NEXT: add a5, s9, a5 -; RV64IM-NEXT: slli s0, t4, 32 -; RV64IM-NEXT: add t4, t4, s0 -; RV64IM-NEXT: slli s4, t2, 32 -; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: add t2, t2, s4 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a0, t0, a0 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, a5 -; RV64IM-NEXT: and a1, a1, a5 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t4 -; RV64IM-NEXT: and a1, a1, t4 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t2 -; RV64IM-NEXT: and a1, a1, t2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or t0, a1, a0 -; RV64IM-NEXT: andi a0, t0, 2 -; RV64IM-NEXT: andi a1, t0, 1 -; RV64IM-NEXT: andi a2, t0, 4 -; RV64IM-NEXT: andi a3, t0, 8 -; RV64IM-NEXT: andi a5, t0, 16 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 32 -; RV64IM-NEXT: mul a1, t0, a2 -; RV64IM-NEXT: mul a2, t0, a3 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a1, t0, 256 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a2, a0 -; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 512 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t4, a7, 39 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: and a1, t0, t1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 40 -; RV64IM-NEXT: and a1, t0, a4 -; RV64IM-NEXT: and a2, t0, s7 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 41 -; RV64IM-NEXT: and a2, t0, s8 -; RV64IM-NEXT: and a3, t0, s10 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 48 -; RV64IM-NEXT: and a3, t0, s11 -; RV64IM-NEXT: and a4, t0, ra -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: mul a4, t0, a4 -; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a3, a7, 49 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 56 -; RV64IM-NEXT: and a1, t0, a2 -; RV64IM-NEXT: and a2, t0, a3 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 57 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 42 -; RV64IM-NEXT: slli ra, a7, 43 -; RV64IM-NEXT: slli a3, a7, 44 -; RV64IM-NEXT: slli a4, a7, 45 -; RV64IM-NEXT: slli t5, a7, 46 -; RV64IM-NEXT: slli s0, a7, 47 -; RV64IM-NEXT: slli s1, a7, 50 -; RV64IM-NEXT: slli a0, a7, 51 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 52 -; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 53 -; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 54 -; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 55 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 58 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 59 -; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 60 -; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 61 -; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a7, a7, 62 -; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, t3 -; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s7, 1 -; RV64IM-NEXT: and a0, t0, s7 -; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s8, 8 -; RV64IM-NEXT: and a0, t0, s8 -; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s3 -; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s6, 32 -; RV64IM-NEXT: and a0, t0, s6 -; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s10, 64 -; RV64IM-NEXT: and a0, t0, s10 -; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s11, 512 -; RV64IM-NEXT: and a0, t0, s11 -; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s4, 1024 -; RV64IM-NEXT: and a0, t0, s4 -; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s5, 2048 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s9, 16384 -; RV64IM-NEXT: and a0, t0, s9 -; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui a5, 32768 -; RV64IM-NEXT: and a5, t0, a5 -; RV64IM-NEXT: lui a6, 65536 -; RV64IM-NEXT: and a6, t0, a6 -; RV64IM-NEXT: lui t1, 131072 -; RV64IM-NEXT: and t1, t0, t1 -; RV64IM-NEXT: lui t2, 262144 -; RV64IM-NEXT: and t2, t0, t2 -; RV64IM-NEXT: and a0, t0, t6 -; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s2 -; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, t4 -; RV64IM-NEXT: and a7, t0, a2 -; RV64IM-NEXT: and ra, t0, ra -; RV64IM-NEXT: and t3, t0, a3 -; RV64IM-NEXT: and t4, t0, a4 -; RV64IM-NEXT: and t5, t0, t5 -; RV64IM-NEXT: and t6, t0, s0 -; RV64IM-NEXT: and s0, t0, s1 -; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s1, t0, a2 -; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s2, t0, a2 -; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s3, t0, a2 -; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s4, t0, a2 -; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s5, t0, a2 -; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s6, t0, a2 -; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s7, t0, a2 -; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s8, t0, a2 -; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s9, t0, a2 -; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s10, t0, a2 -; RV64IM-NEXT: andi s11, t0, 64 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 128 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 1024 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul s11, t0, a2 -; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a4, t0, a2 -; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a3, t0, a2 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, a6 -; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t1 -; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t2 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srliw t2, t0, 31 -; RV64IM-NEXT: slli t2, t2, 31 -; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a5, t0, a5 -; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t1, t0, a6 -; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a6, t0, a6 -; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a1 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a7, t0, a7 -; RV64IM-NEXT: mul ra, t0, ra -; RV64IM-NEXT: mul a6, t0, t3 -; RV64IM-NEXT: mul t4, t0, t4 -; RV64IM-NEXT: mul t5, t0, t5 -; RV64IM-NEXT: mul a0, t0, t6 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul t6, t0, s0 -; RV64IM-NEXT: mul s0, t0, s1 -; RV64IM-NEXT: mul s1, t0, s2 -; RV64IM-NEXT: mul s2, t0, s3 -; RV64IM-NEXT: mul s3, t0, s4 -; RV64IM-NEXT: mul s4, t0, s5 -; RV64IM-NEXT: mul s5, t0, s6 -; RV64IM-NEXT: mul s6, t0, s7 -; RV64IM-NEXT: mul s7, t0, s8 -; RV64IM-NEXT: mul s8, t0, s9 -; RV64IM-NEXT: mul s9, t0, s10 -; RV64IM-NEXT: srli s10, t0, 63 -; RV64IM-NEXT: slli s10, s10, 63 -; RV64IM-NEXT: mul t2, t0, t2 -; RV64IM-NEXT: mul t0, t0, s10 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, a0, a1 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s11, t3, s11 -; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, t3, a4 -; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, t3, a3 -; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, t3, a2 -; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, t3, a7 -; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t6, t3, t6 -; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, t3, s5 -; RV64IM-NEXT: xor a0, s10, a0 -; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, s11, t3 -; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, a5 -; RV64IM-NEXT: xor a5, a7, ra -; RV64IM-NEXT: xor a7, t6, s0 -; RV64IM-NEXT: xor t6, s5, s6 -; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, t3 -; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s10, t3 -; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: xor a6, a7, s1 -; RV64IM-NEXT: xor a7, t6, s7 -; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s0, t1 -; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t3 -; RV64IM-NEXT: xor a5, a5, t4 -; RV64IM-NEXT: xor a6, a6, s2 -; RV64IM-NEXT: xor a7, a7, s8 -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: xor a1, a1, t1 -; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t1 -; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, t5 -; RV64IM-NEXT: xor a6, a6, s3 -; RV64IM-NEXT: xor a7, a7, s9 -; RV64IM-NEXT: xor a1, a1, a4 -; RV64IM-NEXT: xor a3, a3, t2 -; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a4 -; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a5, a4 -; RV64IM-NEXT: xor a5, a6, s4 -; RV64IM-NEXT: slli a0, a0, 56 -; RV64IM-NEXT: xor a6, a7, t0 -; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a7, a1, t0 -; RV64IM-NEXT: xor a1, a1, a3 -; RV64IM-NEXT: slli a7, a7, 40 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: or a0, a0, a7 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a2, a1, a7 -; RV64IM-NEXT: xor a4, a1, a4 -; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: slli a2, a2, 24 -; RV64IM-NEXT: xor a5, a4, a5 -; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, a1, a3 -; RV64IM-NEXT: srli a4, a4, 24 -; RV64IM-NEXT: srliw a3, a5, 24 -; RV64IM-NEXT: and a4, a4, a7 -; RV64IM-NEXT: srli a7, a5, 40 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: slli a3, a3, 32 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: and a4, a7, t0 -; RV64IM-NEXT: srli a5, a5, 56 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a4, a4, a5 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: addi sp, sp, 448 -; RV64IM-NEXT: ret - %res = call i8 @llvm.clmulr.i8(i8 %a, i8 %b) - ret i8 %res -} - -define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { -; RV32IM-LABEL: clmulr_i16: -; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -144 -; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill -; RV32IM-NEXT: srli a3, a0, 8 -; RV32IM-NEXT: lui s9, 16 -; RV32IM-NEXT: srli a4, a0, 24 -; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui a7, 61681 -; RV32IM-NEXT: lui ra, 209715 -; RV32IM-NEXT: lui a1, 349525 -; RV32IM-NEXT: li s0, 1 -; RV32IM-NEXT: lui t1, 1 -; RV32IM-NEXT: lui t2, 2 -; RV32IM-NEXT: lui t3, 4 -; RV32IM-NEXT: lui t4, 8 -; RV32IM-NEXT: lui t0, 32 -; RV32IM-NEXT: lui a6, 64 -; RV32IM-NEXT: lui a5, 128 -; RV32IM-NEXT: lui s1, 256 -; RV32IM-NEXT: lui t5, 512 -; RV32IM-NEXT: lui t6, 1024 -; RV32IM-NEXT: lui s4, 2048 -; RV32IM-NEXT: lui s2, 4096 -; RV32IM-NEXT: lui s3, 8192 -; RV32IM-NEXT: lui s7, 16384 -; RV32IM-NEXT: lui s5, 32768 -; RV32IM-NEXT: lui s6, 65536 -; RV32IM-NEXT: lui s11, 131072 -; RV32IM-NEXT: lui s8, 262144 -; RV32IM-NEXT: addi s10, s9, -256 -; RV32IM-NEXT: and a3, a3, s10 -; RV32IM-NEXT: or a3, a3, a4 -; RV32IM-NEXT: addi a7, a7, -241 -; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a4, ra, 819 -; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a1, a1, 1365 -; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: slli s0, s0, 11 -; RV32IM-NEXT: and a0, a0, s10 -; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: srli a2, a0, 4 -; RV32IM-NEXT: and a0, a0, a7 -; RV32IM-NEXT: and a2, a2, a7 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 2 -; RV32IM-NEXT: and a0, a0, a4 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 1 -; RV32IM-NEXT: and a0, a0, a1 -; RV32IM-NEXT: and a2, a2, a1 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a3, a2, a0 -; RV32IM-NEXT: andi a0, a3, 2 -; RV32IM-NEXT: andi a1, a3, 1 -; RV32IM-NEXT: and a4, a3, s0 -; RV32IM-NEXT: and a7, a3, t1 -; RV32IM-NEXT: and s0, a3, t2 -; RV32IM-NEXT: and ra, a3, t3 -; RV32IM-NEXT: and a2, a3, t4 -; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s9 -; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t0 -; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a6, a3, a6 -; RV32IM-NEXT: and a5, a3, a5 -; RV32IM-NEXT: and s1, a3, s1 -; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t5 -; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t6, a3, t6 -; RV32IM-NEXT: and a2, a3, s4 -; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s2, a3, s2 -; RV32IM-NEXT: and a2, a3, s3 -; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s7 -; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s5 -; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s6 -; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s11 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s8 -; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a2, 524288 -; RV32IM-NEXT: and a2, a3, a2 -; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a0, a3, 4 -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a1, a3, 8 -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a2, a3, 16 -; RV32IM-NEXT: mul s9, a3, a2 -; RV32IM-NEXT: andi t0, a3, 32 -; RV32IM-NEXT: mul s6, a3, t0 -; RV32IM-NEXT: andi t1, a3, 64 -; RV32IM-NEXT: mul a0, a3, t1 -; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 128 -; RV32IM-NEXT: mul a0, a3, t2 -; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 256 -; RV32IM-NEXT: mul s1, a3, t2 -; RV32IM-NEXT: andi t3, a3, 512 -; RV32IM-NEXT: mul t5, a3, t3 -; RV32IM-NEXT: andi t4, a3, 1024 -; RV32IM-NEXT: mul s5, a3, t4 -; RV32IM-NEXT: mul s8, a3, a4 -; RV32IM-NEXT: mul a0, a3, a7 -; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t2, a3, s0 -; RV32IM-NEXT: mul a7, a3, ra -; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a3, a0 -; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s4, a3, a0 -; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s7, a3, a0 -; RV32IM-NEXT: mul a0, a3, a6 -; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a6, a3, a5 -; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a3, a0 -; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t1, a3, a0 -; RV32IM-NEXT: mul t4, a3, t6 -; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s3, a3, a0 -; RV32IM-NEXT: mul a2, a3, s2 -; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a3, a0 -; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a3, a0 -; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a3, a0 -; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t3, a3, a0 -; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t6, a3, a0 -; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s2, a3, a0 -; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a3, a3, a0 -; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, s11, a0 -; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s11, s11, ra -; RV32IM-NEXT: xor s6, s9, s6 -; RV32IM-NEXT: xor t5, s1, t5 -; RV32IM-NEXT: xor a7, t2, a7 -; RV32IM-NEXT: xor a4, a6, a4 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: xor a0, a0, s11 -; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, s6, a2 -; RV32IM-NEXT: xor a6, t5, s5 -; RV32IM-NEXT: xor a7, a7, s0 -; RV32IM-NEXT: xor a4, a4, t1 -; RV32IM-NEXT: xor a1, a1, a5 -; RV32IM-NEXT: xor a0, a0, a2 -; RV32IM-NEXT: xor a2, a6, s8 -; RV32IM-NEXT: xor a5, a7, s4 -; RV32IM-NEXT: xor a4, a4, t4 -; RV32IM-NEXT: xor a1, a1, t0 -; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, a6 -; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, a6 -; RV32IM-NEXT: xor a5, a5, s7 -; RV32IM-NEXT: xor a4, a4, s3 -; RV32IM-NEXT: xor a1, a1, t3 -; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a1, a1, t6 -; RV32IM-NEXT: xor a2, a0, a2 -; RV32IM-NEXT: xor a2, a2, a5 -; RV32IM-NEXT: slli a0, a0, 24 -; RV32IM-NEXT: xor a1, a1, s2 -; RV32IM-NEXT: xor a2, a2, a4 -; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: and a3, a2, s10 -; RV32IM-NEXT: srli a4, a2, 8 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, s10 -; RV32IM-NEXT: srli a1, a1, 24 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: or a1, a2, a1 -; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 144 -; RV32IM-NEXT: ret -; -; RV64IM-LABEL: clmulr_i16: -; RV64IM: # %bb.0: -; RV64IM-NEXT: addi sp, sp, -448 -; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a2, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 -; RV64IM-NEXT: li a3, 255 -; RV64IM-NEXT: srli a5, a0, 40 -; RV64IM-NEXT: lui s3, 16 -; RV64IM-NEXT: srli s0, a0, 56 -; RV64IM-NEXT: srliw t2, a0, 24 -; RV64IM-NEXT: slli t0, a0, 56 -; RV64IM-NEXT: lui t3, 61681 -; RV64IM-NEXT: lui t4, 209715 -; RV64IM-NEXT: lui t6, 349525 -; RV64IM-NEXT: li a7, 1 -; RV64IM-NEXT: lui s5, 2 -; RV64IM-NEXT: lui t1, 4 -; RV64IM-NEXT: lui a4, 128 -; RV64IM-NEXT: lui s7, 256 -; RV64IM-NEXT: lui s8, 4096 -; RV64IM-NEXT: lui s10, 8192 -; RV64IM-NEXT: lui a1, 4080 -; RV64IM-NEXT: and a2, a2, a1 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill -; RV64IM-NEXT: addi s1, s3, -256 -; RV64IM-NEXT: and t5, a0, a1 -; RV64IM-NEXT: slli a1, t2, 32 -; RV64IM-NEXT: addi s9, t3, -241 -; RV64IM-NEXT: addi t4, t4, 819 -; RV64IM-NEXT: addi t2, t6, 1365 -; RV64IM-NEXT: slli t3, a7, 11 -; RV64IM-NEXT: slli s11, a7, 32 -; RV64IM-NEXT: slli ra, a7, 33 -; RV64IM-NEXT: slli t6, a7, 34 -; RV64IM-NEXT: slli s2, a7, 35 -; RV64IM-NEXT: slli s4, a7, 36 -; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a6, a3 -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: slli a3, a7, 37 -; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a5, s1 -; RV64IM-NEXT: or a3, a3, s0 -; RV64IM-NEXT: slli a5, a7, 38 -; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t5, t5, 24 -; RV64IM-NEXT: and a0, a0, s1 -; RV64IM-NEXT: or a1, t5, a1 -; RV64IM-NEXT: slli a5, s9, 32 -; RV64IM-NEXT: add a5, s9, a5 -; RV64IM-NEXT: slli s0, t4, 32 -; RV64IM-NEXT: add t4, t4, s0 -; RV64IM-NEXT: slli s4, t2, 32 -; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: add t2, t2, s4 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a0, t0, a0 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, a5 -; RV64IM-NEXT: and a1, a1, a5 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t4 -; RV64IM-NEXT: and a1, a1, t4 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t2 -; RV64IM-NEXT: and a1, a1, t2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or t0, a1, a0 -; RV64IM-NEXT: andi a0, t0, 2 -; RV64IM-NEXT: andi a1, t0, 1 -; RV64IM-NEXT: andi a2, t0, 4 -; RV64IM-NEXT: andi a3, t0, 8 -; RV64IM-NEXT: andi a5, t0, 16 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 32 -; RV64IM-NEXT: mul a1, t0, a2 -; RV64IM-NEXT: mul a2, t0, a3 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a1, t0, 256 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a2, a0 -; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 512 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t4, a7, 39 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: and a1, t0, t1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 40 -; RV64IM-NEXT: and a1, t0, a4 -; RV64IM-NEXT: and a2, t0, s7 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 41 -; RV64IM-NEXT: and a2, t0, s8 -; RV64IM-NEXT: and a3, t0, s10 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 48 -; RV64IM-NEXT: and a3, t0, s11 -; RV64IM-NEXT: and a4, t0, ra -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: mul a4, t0, a4 -; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a3, a7, 49 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 56 -; RV64IM-NEXT: and a1, t0, a2 -; RV64IM-NEXT: and a2, t0, a3 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 57 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 42 -; RV64IM-NEXT: slli ra, a7, 43 -; RV64IM-NEXT: slli a3, a7, 44 -; RV64IM-NEXT: slli a4, a7, 45 -; RV64IM-NEXT: slli t5, a7, 46 -; RV64IM-NEXT: slli s0, a7, 47 -; RV64IM-NEXT: slli s1, a7, 50 -; RV64IM-NEXT: slli a0, a7, 51 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 52 -; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 53 -; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 54 -; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 55 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 58 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 59 -; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 60 -; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 61 -; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a7, a7, 62 -; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, t3 -; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s7, 1 -; RV64IM-NEXT: and a0, t0, s7 -; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s8, 8 -; RV64IM-NEXT: and a0, t0, s8 -; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s3 -; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s6, 32 -; RV64IM-NEXT: and a0, t0, s6 -; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s10, 64 -; RV64IM-NEXT: and a0, t0, s10 -; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s11, 512 -; RV64IM-NEXT: and a0, t0, s11 -; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s4, 1024 -; RV64IM-NEXT: and a0, t0, s4 -; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s5, 2048 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s9, 16384 -; RV64IM-NEXT: and a0, t0, s9 -; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui a5, 32768 -; RV64IM-NEXT: and a5, t0, a5 -; RV64IM-NEXT: lui a6, 65536 -; RV64IM-NEXT: and a6, t0, a6 -; RV64IM-NEXT: lui t1, 131072 -; RV64IM-NEXT: and t1, t0, t1 -; RV64IM-NEXT: lui t2, 262144 -; RV64IM-NEXT: and t2, t0, t2 -; RV64IM-NEXT: and a0, t0, t6 -; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s2 -; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, t4 -; RV64IM-NEXT: and a7, t0, a2 -; RV64IM-NEXT: and ra, t0, ra -; RV64IM-NEXT: and t3, t0, a3 -; RV64IM-NEXT: and t4, t0, a4 -; RV64IM-NEXT: and t5, t0, t5 -; RV64IM-NEXT: and t6, t0, s0 -; RV64IM-NEXT: and s0, t0, s1 -; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s1, t0, a2 -; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s2, t0, a2 -; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s3, t0, a2 -; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s4, t0, a2 -; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s5, t0, a2 -; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s6, t0, a2 -; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s7, t0, a2 -; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s8, t0, a2 -; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s9, t0, a2 -; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s10, t0, a2 -; RV64IM-NEXT: andi s11, t0, 64 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 128 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 1024 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul s11, t0, a2 -; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a4, t0, a2 -; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a3, t0, a2 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, a6 -; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t1 -; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t2 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srliw t2, t0, 31 -; RV64IM-NEXT: slli t2, t2, 31 -; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a5, t0, a5 -; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t1, t0, a6 -; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a6, t0, a6 -; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a1 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a7, t0, a7 -; RV64IM-NEXT: mul ra, t0, ra -; RV64IM-NEXT: mul a6, t0, t3 -; RV64IM-NEXT: mul t4, t0, t4 -; RV64IM-NEXT: mul t5, t0, t5 -; RV64IM-NEXT: mul a0, t0, t6 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul t6, t0, s0 -; RV64IM-NEXT: mul s0, t0, s1 -; RV64IM-NEXT: mul s1, t0, s2 -; RV64IM-NEXT: mul s2, t0, s3 -; RV64IM-NEXT: mul s3, t0, s4 -; RV64IM-NEXT: mul s4, t0, s5 -; RV64IM-NEXT: mul s5, t0, s6 -; RV64IM-NEXT: mul s6, t0, s7 -; RV64IM-NEXT: mul s7, t0, s8 -; RV64IM-NEXT: mul s8, t0, s9 -; RV64IM-NEXT: mul s9, t0, s10 -; RV64IM-NEXT: srli s10, t0, 63 -; RV64IM-NEXT: slli s10, s10, 63 -; RV64IM-NEXT: mul t2, t0, t2 -; RV64IM-NEXT: mul t0, t0, s10 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, a0, a1 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s11, t3, s11 -; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, t3, a4 -; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, t3, a3 -; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, t3, a2 -; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, t3, a7 -; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t6, t3, t6 -; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, t3, s5 -; RV64IM-NEXT: xor a0, s10, a0 -; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, s11, t3 -; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, a5 -; RV64IM-NEXT: xor a5, a7, ra -; RV64IM-NEXT: xor a7, t6, s0 -; RV64IM-NEXT: xor t6, s5, s6 -; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, t3 -; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s10, t3 -; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: xor a6, a7, s1 -; RV64IM-NEXT: xor a7, t6, s7 -; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s0, t1 -; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t3 -; RV64IM-NEXT: xor a5, a5, t4 -; RV64IM-NEXT: xor a6, a6, s2 -; RV64IM-NEXT: xor a7, a7, s8 -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: xor a1, a1, t1 -; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t1 -; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, t5 -; RV64IM-NEXT: xor a6, a6, s3 -; RV64IM-NEXT: xor a7, a7, s9 -; RV64IM-NEXT: xor a1, a1, a4 -; RV64IM-NEXT: xor a3, a3, t2 -; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a4 -; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a5, a4 -; RV64IM-NEXT: xor a5, a6, s4 -; RV64IM-NEXT: slli a0, a0, 56 -; RV64IM-NEXT: xor a6, a7, t0 -; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a7, a1, t0 -; RV64IM-NEXT: xor a1, a1, a3 -; RV64IM-NEXT: slli a7, a7, 40 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: or a0, a0, a7 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a2, a1, a7 -; RV64IM-NEXT: xor a4, a1, a4 -; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: slli a2, a2, 24 -; RV64IM-NEXT: xor a5, a4, a5 -; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, a1, a3 -; RV64IM-NEXT: srli a4, a4, 24 -; RV64IM-NEXT: srliw a3, a5, 24 -; RV64IM-NEXT: and a4, a4, a7 -; RV64IM-NEXT: srli a7, a5, 40 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: slli a3, a3, 32 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: and a4, a7, t0 -; RV64IM-NEXT: srli a5, a5, 56 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a4, a4, a5 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: addi sp, sp, 448 -; RV64IM-NEXT: ret - %res = call i16 @llvm.clmulr.i16(i16 %a, i16 %b) - ret i16 %res -} - -define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { -; RV32IM-LABEL: clmulr_i32: -; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -144 -; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill -; RV32IM-NEXT: srli a3, a0, 8 -; RV32IM-NEXT: lui s9, 16 -; RV32IM-NEXT: srli a4, a0, 24 -; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui a7, 61681 -; RV32IM-NEXT: lui ra, 209715 -; RV32IM-NEXT: lui a1, 349525 -; RV32IM-NEXT: li s0, 1 -; RV32IM-NEXT: lui t1, 1 -; RV32IM-NEXT: lui t2, 2 -; RV32IM-NEXT: lui t3, 4 -; RV32IM-NEXT: lui t4, 8 -; RV32IM-NEXT: lui t0, 32 -; RV32IM-NEXT: lui a6, 64 -; RV32IM-NEXT: lui a5, 128 -; RV32IM-NEXT: lui s1, 256 -; RV32IM-NEXT: lui t5, 512 -; RV32IM-NEXT: lui t6, 1024 -; RV32IM-NEXT: lui s4, 2048 -; RV32IM-NEXT: lui s2, 4096 -; RV32IM-NEXT: lui s3, 8192 -; RV32IM-NEXT: lui s7, 16384 -; RV32IM-NEXT: lui s5, 32768 -; RV32IM-NEXT: lui s6, 65536 -; RV32IM-NEXT: lui s11, 131072 -; RV32IM-NEXT: lui s8, 262144 -; RV32IM-NEXT: addi s10, s9, -256 -; RV32IM-NEXT: and a3, a3, s10 -; RV32IM-NEXT: or a3, a3, a4 -; RV32IM-NEXT: addi a7, a7, -241 -; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a4, ra, 819 -; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi a1, a1, 1365 -; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: slli s0, s0, 11 -; RV32IM-NEXT: and a0, a0, s10 -; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: srli a2, a0, 4 -; RV32IM-NEXT: and a0, a0, a7 -; RV32IM-NEXT: and a2, a2, a7 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 2 -; RV32IM-NEXT: and a0, a0, a4 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a0, 1 -; RV32IM-NEXT: and a0, a0, a1 -; RV32IM-NEXT: and a2, a2, a1 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a3, a2, a0 -; RV32IM-NEXT: andi a0, a3, 2 -; RV32IM-NEXT: andi a1, a3, 1 -; RV32IM-NEXT: and a4, a3, s0 -; RV32IM-NEXT: and a7, a3, t1 -; RV32IM-NEXT: and s0, a3, t2 -; RV32IM-NEXT: and ra, a3, t3 -; RV32IM-NEXT: and a2, a3, t4 -; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s9 -; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t0 -; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a6, a3, a6 -; RV32IM-NEXT: and a5, a3, a5 -; RV32IM-NEXT: and s1, a3, s1 -; RV32IM-NEXT: sw s1, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, t5 -; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t6, a3, t6 -; RV32IM-NEXT: and a2, a3, s4 -; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s2, a3, s2 -; RV32IM-NEXT: and a2, a3, s3 -; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s7 -; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s5 -; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s6 -; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s11 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a3, s8 -; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a2, 524288 -; RV32IM-NEXT: and a2, a3, a2 -; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a0, a3, 4 -; RV32IM-NEXT: mul a0, a3, a0 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a1, a3, 8 -; RV32IM-NEXT: mul a0, a3, a1 -; RV32IM-NEXT: sw a0, 0(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a2, a3, 16 -; RV32IM-NEXT: mul s9, a3, a2 -; RV32IM-NEXT: andi t0, a3, 32 -; RV32IM-NEXT: mul s6, a3, t0 -; RV32IM-NEXT: andi t1, a3, 64 -; RV32IM-NEXT: mul a0, a3, t1 -; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 128 -; RV32IM-NEXT: mul a0, a3, t2 -; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t2, a3, 256 -; RV32IM-NEXT: mul s1, a3, t2 -; RV32IM-NEXT: andi t3, a3, 512 -; RV32IM-NEXT: mul t5, a3, t3 -; RV32IM-NEXT: andi t4, a3, 1024 -; RV32IM-NEXT: mul s5, a3, t4 -; RV32IM-NEXT: mul s8, a3, a4 -; RV32IM-NEXT: mul a0, a3, a7 -; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t2, a3, s0 -; RV32IM-NEXT: mul a7, a3, ra -; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a3, a0 -; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s4, a3, a0 -; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s7, a3, a0 -; RV32IM-NEXT: mul a0, a3, a6 -; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a6, a3, a5 -; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a3, a0 -; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t1, a3, a0 -; RV32IM-NEXT: mul t4, a3, t6 -; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s3, a3, a0 -; RV32IM-NEXT: mul a2, a3, s2 -; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a3, a0 -; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a3, a0 -; RV32IM-NEXT: lw a0, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a3, a0 -; RV32IM-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t3, a3, a0 -; RV32IM-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t6, a3, a0 -; RV32IM-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s2, a3, a0 -; RV32IM-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a3, a3, a0 -; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, s11, a0 -; RV32IM-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s11, s11, ra -; RV32IM-NEXT: xor s6, s9, s6 -; RV32IM-NEXT: xor t5, s1, t5 -; RV32IM-NEXT: xor a7, t2, a7 -; RV32IM-NEXT: xor a4, a6, a4 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: xor a0, a0, s11 -; RV32IM-NEXT: lw a2, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, s6, a2 -; RV32IM-NEXT: xor a6, t5, s5 -; RV32IM-NEXT: xor a7, a7, s0 -; RV32IM-NEXT: xor a4, a4, t1 -; RV32IM-NEXT: xor a1, a1, a5 -; RV32IM-NEXT: xor a0, a0, a2 -; RV32IM-NEXT: xor a2, a6, s8 -; RV32IM-NEXT: xor a5, a7, s4 -; RV32IM-NEXT: xor a4, a4, t4 -; RV32IM-NEXT: xor a1, a1, t0 -; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, a6 -; RV32IM-NEXT: lw a6, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, a6 -; RV32IM-NEXT: xor a5, a5, s7 -; RV32IM-NEXT: xor a4, a4, s3 -; RV32IM-NEXT: xor a1, a1, t3 -; RV32IM-NEXT: lw a6, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a1, a1, t6 -; RV32IM-NEXT: xor a2, a0, a2 -; RV32IM-NEXT: xor a2, a2, a5 -; RV32IM-NEXT: slli a0, a0, 24 -; RV32IM-NEXT: xor a1, a1, s2 -; RV32IM-NEXT: xor a2, a2, a4 -; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: and a3, a2, s10 -; RV32IM-NEXT: srli a4, a2, 8 -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, s10 -; RV32IM-NEXT: srli a1, a1, 24 -; RV32IM-NEXT: or a0, a0, a3 -; RV32IM-NEXT: or a1, a2, a1 -; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: lw a2, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 144 -; RV32IM-NEXT: ret -; -; RV64IM-LABEL: clmulr_i32: -; RV64IM: # %bb.0: -; RV64IM-NEXT: addi sp, sp, -448 -; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a2, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 -; RV64IM-NEXT: li a3, 255 -; RV64IM-NEXT: srli a5, a0, 40 -; RV64IM-NEXT: lui s3, 16 -; RV64IM-NEXT: srli s0, a0, 56 -; RV64IM-NEXT: srliw t2, a0, 24 -; RV64IM-NEXT: slli t0, a0, 56 -; RV64IM-NEXT: lui t3, 61681 -; RV64IM-NEXT: lui t4, 209715 -; RV64IM-NEXT: lui t6, 349525 -; RV64IM-NEXT: li a7, 1 -; RV64IM-NEXT: lui s5, 2 -; RV64IM-NEXT: lui t1, 4 -; RV64IM-NEXT: lui a4, 128 -; RV64IM-NEXT: lui s7, 256 -; RV64IM-NEXT: lui s8, 4096 -; RV64IM-NEXT: lui s10, 8192 -; RV64IM-NEXT: lui a1, 4080 -; RV64IM-NEXT: and a2, a2, a1 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill -; RV64IM-NEXT: addi s1, s3, -256 -; RV64IM-NEXT: and t5, a0, a1 -; RV64IM-NEXT: slli a1, t2, 32 -; RV64IM-NEXT: addi s9, t3, -241 -; RV64IM-NEXT: addi t4, t4, 819 -; RV64IM-NEXT: addi t2, t6, 1365 -; RV64IM-NEXT: slli t3, a7, 11 -; RV64IM-NEXT: slli s11, a7, 32 -; RV64IM-NEXT: slli ra, a7, 33 -; RV64IM-NEXT: slli t6, a7, 34 -; RV64IM-NEXT: slli s2, a7, 35 -; RV64IM-NEXT: slli s4, a7, 36 -; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a6, a3 -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: slli a3, a7, 37 -; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a5, s1 -; RV64IM-NEXT: or a3, a3, s0 -; RV64IM-NEXT: slli a5, a7, 38 -; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t5, t5, 24 -; RV64IM-NEXT: and a0, a0, s1 -; RV64IM-NEXT: or a1, t5, a1 -; RV64IM-NEXT: slli a5, s9, 32 -; RV64IM-NEXT: add a5, s9, a5 -; RV64IM-NEXT: slli s0, t4, 32 -; RV64IM-NEXT: add t4, t4, s0 -; RV64IM-NEXT: slli s4, t2, 32 -; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: add t2, t2, s4 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a0, t0, a0 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, a5 -; RV64IM-NEXT: and a1, a1, a5 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t4 -; RV64IM-NEXT: and a1, a1, t4 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t2 -; RV64IM-NEXT: and a1, a1, t2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or t0, a1, a0 -; RV64IM-NEXT: andi a0, t0, 2 -; RV64IM-NEXT: andi a1, t0, 1 -; RV64IM-NEXT: andi a2, t0, 4 -; RV64IM-NEXT: andi a3, t0, 8 -; RV64IM-NEXT: andi a5, t0, 16 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 32 -; RV64IM-NEXT: mul a1, t0, a2 -; RV64IM-NEXT: mul a2, t0, a3 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a1, t0, 256 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a2, a0 -; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 512 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t4, a7, 39 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: and a1, t0, t1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 40 -; RV64IM-NEXT: and a1, t0, a4 -; RV64IM-NEXT: and a2, t0, s7 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 41 -; RV64IM-NEXT: and a2, t0, s8 -; RV64IM-NEXT: and a3, t0, s10 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 48 -; RV64IM-NEXT: and a3, t0, s11 -; RV64IM-NEXT: and a4, t0, ra -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: mul a4, t0, a4 -; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a3, a7, 49 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 56 -; RV64IM-NEXT: and a1, t0, a2 -; RV64IM-NEXT: and a2, t0, a3 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 57 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 42 -; RV64IM-NEXT: slli ra, a7, 43 -; RV64IM-NEXT: slli a3, a7, 44 -; RV64IM-NEXT: slli a4, a7, 45 -; RV64IM-NEXT: slli t5, a7, 46 -; RV64IM-NEXT: slli s0, a7, 47 -; RV64IM-NEXT: slli s1, a7, 50 -; RV64IM-NEXT: slli a0, a7, 51 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 52 -; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 53 -; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 54 -; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 55 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 58 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 59 -; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 60 -; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 61 -; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a7, a7, 62 -; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, t3 -; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s7, 1 -; RV64IM-NEXT: and a0, t0, s7 -; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s8, 8 -; RV64IM-NEXT: and a0, t0, s8 -; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s3 -; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s6, 32 -; RV64IM-NEXT: and a0, t0, s6 -; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s10, 64 -; RV64IM-NEXT: and a0, t0, s10 -; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s11, 512 -; RV64IM-NEXT: and a0, t0, s11 -; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s4, 1024 -; RV64IM-NEXT: and a0, t0, s4 -; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s5, 2048 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s9, 16384 -; RV64IM-NEXT: and a0, t0, s9 -; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui a5, 32768 -; RV64IM-NEXT: and a5, t0, a5 -; RV64IM-NEXT: lui a6, 65536 -; RV64IM-NEXT: and a6, t0, a6 -; RV64IM-NEXT: lui t1, 131072 -; RV64IM-NEXT: and t1, t0, t1 -; RV64IM-NEXT: lui t2, 262144 -; RV64IM-NEXT: and t2, t0, t2 -; RV64IM-NEXT: and a0, t0, t6 -; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s2 -; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, t4 -; RV64IM-NEXT: and a7, t0, a2 -; RV64IM-NEXT: and ra, t0, ra -; RV64IM-NEXT: and t3, t0, a3 -; RV64IM-NEXT: and t4, t0, a4 -; RV64IM-NEXT: and t5, t0, t5 -; RV64IM-NEXT: and t6, t0, s0 -; RV64IM-NEXT: and s0, t0, s1 -; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s1, t0, a2 -; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s2, t0, a2 -; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s3, t0, a2 -; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s4, t0, a2 -; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s5, t0, a2 -; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s6, t0, a2 -; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s7, t0, a2 -; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s8, t0, a2 -; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s9, t0, a2 -; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s10, t0, a2 -; RV64IM-NEXT: andi s11, t0, 64 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 128 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 1024 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul s11, t0, a2 -; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a4, t0, a2 -; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a3, t0, a2 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, a6 -; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t1 -; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t2 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srliw t2, t0, 31 -; RV64IM-NEXT: slli t2, t2, 31 -; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a5, t0, a5 -; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t1, t0, a6 -; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a6, t0, a6 -; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a1 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a7, t0, a7 -; RV64IM-NEXT: mul ra, t0, ra -; RV64IM-NEXT: mul a6, t0, t3 -; RV64IM-NEXT: mul t4, t0, t4 -; RV64IM-NEXT: mul t5, t0, t5 -; RV64IM-NEXT: mul a0, t0, t6 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul t6, t0, s0 -; RV64IM-NEXT: mul s0, t0, s1 -; RV64IM-NEXT: mul s1, t0, s2 -; RV64IM-NEXT: mul s2, t0, s3 -; RV64IM-NEXT: mul s3, t0, s4 -; RV64IM-NEXT: mul s4, t0, s5 -; RV64IM-NEXT: mul s5, t0, s6 -; RV64IM-NEXT: mul s6, t0, s7 -; RV64IM-NEXT: mul s7, t0, s8 -; RV64IM-NEXT: mul s8, t0, s9 -; RV64IM-NEXT: mul s9, t0, s10 -; RV64IM-NEXT: srli s10, t0, 63 -; RV64IM-NEXT: slli s10, s10, 63 -; RV64IM-NEXT: mul t2, t0, t2 -; RV64IM-NEXT: mul t0, t0, s10 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, a0, a1 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s11, t3, s11 -; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, t3, a4 -; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, t3, a3 -; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, t3, a2 -; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, t3, a7 -; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t6, t3, t6 -; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, t3, s5 -; RV64IM-NEXT: xor a0, s10, a0 -; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, s11, t3 -; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, a5 -; RV64IM-NEXT: xor a5, a7, ra -; RV64IM-NEXT: xor a7, t6, s0 -; RV64IM-NEXT: xor t6, s5, s6 -; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, t3 -; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s10, t3 -; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: xor a6, a7, s1 -; RV64IM-NEXT: xor a7, t6, s7 -; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s0, t1 -; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t3 -; RV64IM-NEXT: xor a5, a5, t4 -; RV64IM-NEXT: xor a6, a6, s2 -; RV64IM-NEXT: xor a7, a7, s8 -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: xor a1, a1, t1 -; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t1 -; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, t5 -; RV64IM-NEXT: xor a6, a6, s3 -; RV64IM-NEXT: xor a7, a7, s9 -; RV64IM-NEXT: xor a1, a1, a4 -; RV64IM-NEXT: xor a3, a3, t2 -; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a4 -; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a5, a4 -; RV64IM-NEXT: xor a5, a6, s4 -; RV64IM-NEXT: slli a0, a0, 56 -; RV64IM-NEXT: xor a6, a7, t0 -; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a7, a1, t0 -; RV64IM-NEXT: xor a1, a1, a3 -; RV64IM-NEXT: slli a7, a7, 40 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: or a0, a0, a7 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a2, a1, a7 -; RV64IM-NEXT: xor a4, a1, a4 -; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: slli a2, a2, 24 -; RV64IM-NEXT: xor a5, a4, a5 -; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, a1, a3 -; RV64IM-NEXT: srli a4, a4, 24 -; RV64IM-NEXT: srliw a3, a5, 24 -; RV64IM-NEXT: and a4, a4, a7 -; RV64IM-NEXT: srli a7, a5, 40 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: slli a3, a3, 32 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: and a4, a7, t0 -; RV64IM-NEXT: srli a5, a5, 56 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a4, a4, a5 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: addi sp, sp, 448 -; RV64IM-NEXT: ret - %res = call i32 @llvm.clmulr.i32(i32 %a, i32 %b) - ret i32 %res -} - -define i64 @clmulr_i64(i64 %a, i64 %b) nounwind { -; RV32IM-LABEL: clmulr_i64: -; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -512 -; RV32IM-NEXT: sw ra, 508(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s0, 504(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s1, 500(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s2, 496(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s3, 492(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s4, 488(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s5, 484(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s6, 480(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s7, 476(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s8, 472(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 468(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s10, 464(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s11, 460(sp) # 4-byte Folded Spill -; RV32IM-NEXT: srli t3, a0, 8 -; RV32IM-NEXT: lui s8, 16 -; RV32IM-NEXT: srli t4, a0, 24 -; RV32IM-NEXT: slli s2, a0, 24 -; RV32IM-NEXT: lui t5, 61681 -; RV32IM-NEXT: lui t6, 209715 -; RV32IM-NEXT: lui s0, 349525 -; RV32IM-NEXT: srli s4, a1, 8 -; RV32IM-NEXT: srli s1, a1, 24 -; RV32IM-NEXT: slli s3, a1, 24 -; RV32IM-NEXT: li s10, 1 -; RV32IM-NEXT: lui a3, 1 -; RV32IM-NEXT: lui a4, 2 -; RV32IM-NEXT: lui a5, 4 -; RV32IM-NEXT: lui a6, 8 -; RV32IM-NEXT: lui a7, 32 -; RV32IM-NEXT: lui t0, 64 -; RV32IM-NEXT: lui t1, 128 -; RV32IM-NEXT: lui t2, 256 -; RV32IM-NEXT: lui a2, 512 -; RV32IM-NEXT: addi s7, s8, -256 -; RV32IM-NEXT: sw s7, 396(sp) # 4-byte Folded Spill -; RV32IM-NEXT: addi s6, t5, -241 -; RV32IM-NEXT: addi s5, t6, 819 -; RV32IM-NEXT: addi t6, s0, 1365 -; RV32IM-NEXT: slli s10, s10, 11 -; RV32IM-NEXT: and t3, t3, s7 -; RV32IM-NEXT: and a0, a0, s7 -; RV32IM-NEXT: and t5, s4, s7 -; RV32IM-NEXT: and a1, a1, s7 -; RV32IM-NEXT: or t3, t3, t4 -; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or t4, t5, s1 -; RV32IM-NEXT: slli a1, a1, 8 -; RV32IM-NEXT: or a0, s2, a0 -; RV32IM-NEXT: or a1, s3, a1 -; RV32IM-NEXT: or a0, a0, t3 -; RV32IM-NEXT: or a1, a1, t4 -; RV32IM-NEXT: srli t3, a0, 4 -; RV32IM-NEXT: sw s6, 400(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a0, a0, s6 -; RV32IM-NEXT: srli t4, a1, 4 -; RV32IM-NEXT: and a1, a1, s6 -; RV32IM-NEXT: and t3, t3, s6 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: and t4, t4, s6 -; RV32IM-NEXT: slli a1, a1, 4 -; RV32IM-NEXT: or a0, t3, a0 -; RV32IM-NEXT: or a1, t4, a1 -; RV32IM-NEXT: srli t3, a0, 2 -; RV32IM-NEXT: sw s5, 404(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a0, a0, s5 -; RV32IM-NEXT: srli t4, a1, 2 -; RV32IM-NEXT: and a1, a1, s5 -; RV32IM-NEXT: and t3, t3, s5 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: and t4, t4, s5 -; RV32IM-NEXT: slli a1, a1, 2 -; RV32IM-NEXT: or a0, t3, a0 -; RV32IM-NEXT: or a1, t4, a1 -; RV32IM-NEXT: srli t3, a0, 1 -; RV32IM-NEXT: sw t6, 408(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a0, a0, t6 -; RV32IM-NEXT: srli t4, a1, 1 -; RV32IM-NEXT: and a1, a1, t6 -; RV32IM-NEXT: and t3, t3, t6 -; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: and t4, t4, t6 -; RV32IM-NEXT: slli a1, a1, 1 -; RV32IM-NEXT: or s2, t3, a0 -; RV32IM-NEXT: or a0, t4, a1 -; RV32IM-NEXT: and a1, a0, s10 -; RV32IM-NEXT: sw a1, 432(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a3 -; RV32IM-NEXT: sw a1, 436(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a4 -; RV32IM-NEXT: sw a1, 440(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a5 -; RV32IM-NEXT: sw a1, 340(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a6 -; RV32IM-NEXT: sw a1, 412(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, s8 -; RV32IM-NEXT: sw a1, 444(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a7 -; RV32IM-NEXT: sw a1, 452(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and ra, a0, t0 -; RV32IM-NEXT: and a1, a0, t1 -; RV32IM-NEXT: sw a1, 344(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, t2 -; RV32IM-NEXT: sw a1, 448(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a2 -; RV32IM-NEXT: sw a1, 456(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, s10 -; RV32IM-NEXT: sw a1, 384(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a3 -; RV32IM-NEXT: sw a1, 380(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a4 -; RV32IM-NEXT: sw a1, 376(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a5 -; RV32IM-NEXT: sw a1, 368(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a6 -; RV32IM-NEXT: sw a1, 348(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, s8 -; RV32IM-NEXT: sw a1, 336(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a7 -; RV32IM-NEXT: sw a1, 324(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, t0 -; RV32IM-NEXT: sw a1, 320(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, t1 -; RV32IM-NEXT: sw a1, 312(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, t2 -; RV32IM-NEXT: sw a1, 308(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a2 -; RV32IM-NEXT: sw a1, 300(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 1024 -; RV32IM-NEXT: and a2, a0, a1 -; RV32IM-NEXT: sw a2, 424(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 164(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 2048 -; RV32IM-NEXT: and a2, a0, a1 -; RV32IM-NEXT: sw a2, 428(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 4096 -; RV32IM-NEXT: and a2, a0, a1 -; RV32IM-NEXT: sw a2, 416(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 132(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 8192 -; RV32IM-NEXT: and s1, a0, a1 -; RV32IM-NEXT: sw s1, 108(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 128(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 16384 -; RV32IM-NEXT: and a2, a0, a1 -; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 32768 -; RV32IM-NEXT: and a2, a0, a1 -; RV32IM-NEXT: sw a2, 420(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 104(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 65536 -; RV32IM-NEXT: and t3, a0, a1 -; RV32IM-NEXT: sw t3, 116(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 131072 -; RV32IM-NEXT: and a2, a0, a1 -; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 262144 -; RV32IM-NEXT: and t2, a0, a1 -; RV32IM-NEXT: sw t2, 120(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui a1, 524288 -; RV32IM-NEXT: and t1, a0, a1 -; RV32IM-NEXT: sw t1, 124(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, s2, a1 -; RV32IM-NEXT: sw a1, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t0, a0, 4 -; RV32IM-NEXT: sw t0, 96(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t4, a0, 2 -; RV32IM-NEXT: andi a7, a0, 1 -; RV32IM-NEXT: sw a7, 92(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi t5, a0, 8 -; RV32IM-NEXT: andi a6, a0, 16 -; RV32IM-NEXT: sw a6, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a5, a0, 32 -; RV32IM-NEXT: sw a5, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a4, a0, 64 -; RV32IM-NEXT: sw a4, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a3, a0, 128 -; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi a2, a0, 256 -; RV32IM-NEXT: andi a1, a0, 512 -; RV32IM-NEXT: andi s11, a0, 1024 -; RV32IM-NEXT: andi s3, s2, 1 -; RV32IM-NEXT: andi s5, s2, 2 -; RV32IM-NEXT: andi s7, s2, 4 -; RV32IM-NEXT: andi t6, s2, 8 -; RV32IM-NEXT: andi s0, s2, 16 -; RV32IM-NEXT: sw s0, 392(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi s0, s2, 32 -; RV32IM-NEXT: andi s4, s2, 64 -; RV32IM-NEXT: andi s6, s2, 128 -; RV32IM-NEXT: andi s8, s2, 256 -; RV32IM-NEXT: andi s9, s2, 512 -; RV32IM-NEXT: andi s10, s2, 1024 -; RV32IM-NEXT: sw s10, 360(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, t0 -; RV32IM-NEXT: sw s10, 292(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, t4 -; RV32IM-NEXT: sw s10, 288(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, a7 -; RV32IM-NEXT: sw s10, 332(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, t5 -; RV32IM-NEXT: sw s10, 284(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, a6 -; RV32IM-NEXT: sw s10, 280(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, a5 -; RV32IM-NEXT: sw s10, 276(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, a4 -; RV32IM-NEXT: sw s10, 272(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, a3 -; RV32IM-NEXT: sw s10, 268(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, a2 -; RV32IM-NEXT: mv t0, a2 -; RV32IM-NEXT: sw s10, 264(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, a1 -; RV32IM-NEXT: mv a7, a1 -; RV32IM-NEXT: sw s10, 260(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s10, s2, s11 -; RV32IM-NEXT: mv a6, s11 -; RV32IM-NEXT: sw s10, 256(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s10, 432(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s10, s2, s10 -; RV32IM-NEXT: sw s10, 252(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s10, 436(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s10, s2, s10 -; RV32IM-NEXT: sw s10, 248(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s10, 440(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s10, s2, s10 -; RV32IM-NEXT: sw s10, 244(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s10, 340(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s11, s2, s10 -; RV32IM-NEXT: sw s11, 240(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s11, 412(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s11, s2, s11 -; RV32IM-NEXT: sw s11, 236(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s11, 444(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s11, s2, s11 -; RV32IM-NEXT: sw s11, 232(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s11, 452(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s11, s2, s11 -; RV32IM-NEXT: sw s11, 228(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s11, s2, ra -; RV32IM-NEXT: sw s11, 224(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mv a5, ra -; RV32IM-NEXT: lw s11, 344(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul ra, s2, s11 -; RV32IM-NEXT: sw ra, 220(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 448(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul ra, s2, ra -; RV32IM-NEXT: sw ra, 216(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 456(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul ra, s2, ra -; RV32IM-NEXT: sw ra, 212(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 424(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul ra, s2, ra -; RV32IM-NEXT: sw ra, 208(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 428(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul ra, s2, ra -; RV32IM-NEXT: sw ra, 204(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 416(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul ra, s2, ra -; RV32IM-NEXT: sw ra, 200(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul ra, s2, s1 -; RV32IM-NEXT: sw ra, 196(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s1, s2, ra -; RV32IM-NEXT: sw s1, 192(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s1, 420(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s1, s2, s1 -; RV32IM-NEXT: sw s1, 188(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s1, s2, t3 -; RV32IM-NEXT: sw s1, 184(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s1, 16(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, s2, s1 -; RV32IM-NEXT: sw a4, 180(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a4, s2, t2 -; RV32IM-NEXT: sw a4, 176(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a3, s2, t1 -; RV32IM-NEXT: sw a3, 172(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s2, a0, s3 -; RV32IM-NEXT: sw s2, 352(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s2, a0, s5 -; RV32IM-NEXT: sw s2, 364(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s2, a0, s7 -; RV32IM-NEXT: sw s2, 372(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a4, a0, t6 -; RV32IM-NEXT: sw a4, 388(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 392(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a1 -; RV32IM-NEXT: sw a4, 392(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a2, a0, s0 -; RV32IM-NEXT: sw a2, 160(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a2, a0, s4 -; RV32IM-NEXT: sw a2, 156(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a2, a0, s6 -; RV32IM-NEXT: sw a2, 304(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a2, a0, s8 -; RV32IM-NEXT: sw a2, 152(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a2, a0, s9 -; RV32IM-NEXT: sw a2, 148(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 360(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a1 -; RV32IM-NEXT: sw a2, 296(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 384(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a2 -; RV32IM-NEXT: sw a2, 316(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 380(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a2 -; RV32IM-NEXT: sw a4, 328(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 376(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a2 -; RV32IM-NEXT: sw a4, 356(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 368(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a2 -; RV32IM-NEXT: sw a4, 360(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 348(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a2 -; RV32IM-NEXT: sw a4, 368(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 336(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a2 -; RV32IM-NEXT: sw a4, 376(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 324(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a2 -; RV32IM-NEXT: sw a4, 380(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 320(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a2 -; RV32IM-NEXT: sw a4, 384(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 312(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a2 -; RV32IM-NEXT: sw a2, 144(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 308(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a2 -; RV32IM-NEXT: sw a2, 140(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 300(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a2 -; RV32IM-NEXT: sw a2, 168(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 164(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a2 -; RV32IM-NEXT: sw a2, 308(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a2, 136(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a2 -; RV32IM-NEXT: sw a2, 320(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 132(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a1 -; RV32IM-NEXT: sw a2, 132(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 128(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a1 -; RV32IM-NEXT: sw a2, 128(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 112(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a1 -; RV32IM-NEXT: sw a2, 164(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 104(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a1 -; RV32IM-NEXT: sw a2, 300(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 100(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a0, a1 -; RV32IM-NEXT: sw a2, 312(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a1 -; RV32IM-NEXT: sw a4, 324(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a1 -; RV32IM-NEXT: sw a4, 336(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a0, a1 -; RV32IM-NEXT: sw a4, 348(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu t6, a0, t4 -; RV32IM-NEXT: mul a1, a0, t4 -; RV32IM-NEXT: sw a1, 104(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 92(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 100(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 96(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu t2, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 96(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu t3, a0, t5 -; RV32IM-NEXT: mul a1, a0, t5 -; RV32IM-NEXT: sw a1, 92(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu t4, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu s0, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu s2, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 112(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu s3, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 136(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu a2, a0, t0 -; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, t0 -; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu a2, a0, a7 -; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a7 -; RV32IM-NEXT: sw a1, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu a2, a0, a6 -; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a6 -; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 432(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a2, a0, a1 -; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 432(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 436(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a2, a0, a1 -; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 436(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 440(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a2, a0, a1 -; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu a2, a0, s10 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, s10 -; RV32IM-NEXT: sw a1, 340(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 412(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a2, a0, a1 -; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 412(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 444(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a2, a0, a1 -; RV32IM-NEXT: sw a2, 20(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 440(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 452(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a2, a0, a1 -; RV32IM-NEXT: sw a2, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 444(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu a2, a0, a5 -; RV32IM-NEXT: sw a2, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a0, a5 -; RV32IM-NEXT: sw a1, 452(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mv a1, s11 -; RV32IM-NEXT: mulhu s11, a0, s11 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 448(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a5, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 456(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu s10, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 344(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 424(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu s8, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 424(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 428(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu s9, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 456(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 416(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a7, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 108(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a3, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 0(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu a2, a0, ra -; RV32IM-NEXT: mul a1, a0, ra -; RV32IM-NEXT: sw a1, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 420(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu t5, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 108(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 116(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu t0, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 416(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mulhu a6, a0, s1 -; RV32IM-NEXT: mul a1, a0, s1 -; RV32IM-NEXT: sw a1, 420(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a1, 120(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a4, a0, a1 -; RV32IM-NEXT: mul a1, a0, a1 -; RV32IM-NEXT: sw a1, 428(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw t1, 124(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mulhu a1, a0, t1 -; RV32IM-NEXT: mul a0, a0, t1 -; RV32IM-NEXT: sw a0, 448(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 292(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t2, a0 -; RV32IM-NEXT: sw a0, 116(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 288(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s7, t6, a0 -; RV32IM-NEXT: lw a0, 284(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s5, t3, a0 -; RV32IM-NEXT: lw a0, 280(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s6, t4, a0 -; RV32IM-NEXT: lw a0, 276(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s4, s0, a0 -; RV32IM-NEXT: lw a0, 272(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, s2, a0 -; RV32IM-NEXT: sw a0, 124(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 268(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s3, s3, a0 -; RV32IM-NEXT: lw a0, 264(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s2, t1, a0 -; RV32IM-NEXT: lw a0, 260(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t1, a0 -; RV32IM-NEXT: sw a0, 120(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 256(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t1, a0 -; RV32IM-NEXT: sw a0, 272(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 252(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s0, s0, a0 -; RV32IM-NEXT: lw a0, 248(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or t6, t1, a0 -; RV32IM-NEXT: lw a0, 244(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t1, a0 -; RV32IM-NEXT: sw a0, 252(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 240(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t1, a0 -; RV32IM-NEXT: sw a0, 264(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 236(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t1, a0 -; RV32IM-NEXT: sw a0, 284(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 232(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or t4, t1, a0 -; RV32IM-NEXT: lw a0, 228(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or t3, t1, a0 -; RV32IM-NEXT: lw a0, 224(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw t1, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t1, a0 -; RV32IM-NEXT: sw a0, 248(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 220(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, s11, a0 -; RV32IM-NEXT: sw a0, 260(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 216(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, a5, a0 -; RV32IM-NEXT: sw a0, 276(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 212(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, s10, a0 -; RV32IM-NEXT: sw a0, 288(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 208(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s8, s8, a0 -; RV32IM-NEXT: lw a0, 204(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s10, s9, a0 -; RV32IM-NEXT: lw a0, 200(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s11, a7, a0 -; RV32IM-NEXT: lw a0, 196(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, a3, a0 -; RV32IM-NEXT: sw a0, 256(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 192(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: sw a0, 268(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 188(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t5, a0 -; RV32IM-NEXT: sw a0, 280(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 184(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or a0, t0, a0 -; RV32IM-NEXT: sw a0, 292(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 180(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or t2, a6, a0 -; RV32IM-NEXT: lw a0, 176(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or t1, a4, a0 -; RV32IM-NEXT: lw s1, 172(sp) # 4-byte Folded Reload -; RV32IM-NEXT: or s1, a1, s1 -; RV32IM-NEXT: lw a0, 160(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a1, 156(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t0, a0, a1 -; RV32IM-NEXT: lw a0, 152(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a1, 148(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t5, a0, a1 -; RV32IM-NEXT: lw a0, 144(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a1, 140(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a6, a0, a1 -; RV32IM-NEXT: lw a0, 132(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a1, 128(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s9, a0, a1 -; RV32IM-NEXT: lw a0, 104(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a1, 100(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a7, a1, a0 -; RV32IM-NEXT: lw a0, 96(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a1, 92(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, a1 -; RV32IM-NEXT: lw a1, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a2, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a1, a2 -; RV32IM-NEXT: lw a2, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a3, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, a3 -; RV32IM-NEXT: lw a3, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a4, 340(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a3, a4 -; RV32IM-NEXT: lw a4, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw a5, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, a5 -; RV32IM-NEXT: lw a5, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw ra, 0(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, ra -; RV32IM-NEXT: lw ra, 332(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s7, ra, s7 -; RV32IM-NEXT: lw ra, 116(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s5, ra, s5 -; RV32IM-NEXT: xor s4, s6, s4 -; RV32IM-NEXT: xor s2, s3, s2 -; RV32IM-NEXT: xor t6, s0, t6 -; RV32IM-NEXT: xor t3, t4, t3 -; RV32IM-NEXT: xor t4, s8, s10 -; RV32IM-NEXT: xor t1, t2, t1 -; RV32IM-NEXT: lw t2, 304(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t0, t0, t2 -; RV32IM-NEXT: lw t2, 296(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t2, t5, t2 -; RV32IM-NEXT: lw t5, 168(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a6, a6, t5 -; RV32IM-NEXT: lw t5, 164(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t5, s9, t5 -; RV32IM-NEXT: xor a0, a7, a0 -; RV32IM-NEXT: lw a7, 112(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a1, a7 -; RV32IM-NEXT: lw a7, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, a7 -; RV32IM-NEXT: lw a7, 412(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a3, a7 -; RV32IM-NEXT: lw a7, 344(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, a7 -; RV32IM-NEXT: lw a7, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a7 -; RV32IM-NEXT: xor a7, s7, s5 -; RV32IM-NEXT: lw s0, 124(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s0, s4, s0 -; RV32IM-NEXT: lw s3, 120(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor s2, s2, s3 -; RV32IM-NEXT: lw s3, 252(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t6, t6, s3 -; RV32IM-NEXT: lw s3, 248(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t3, t3, s3 -; RV32IM-NEXT: xor t4, t4, s11 -; RV32IM-NEXT: xor t1, t1, s1 -; RV32IM-NEXT: lw s1, 316(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t2, t2, s1 -; RV32IM-NEXT: lw s1, 308(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a6, a6, s1 -; RV32IM-NEXT: lw s1, 300(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t5, t5, s1 -; RV32IM-NEXT: xor a0, a0, a1 -; RV32IM-NEXT: lw a1, 432(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a2, a1 -; RV32IM-NEXT: lw a2, 440(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a3, a2 -; RV32IM-NEXT: lw a3, 424(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a4, a3 -; RV32IM-NEXT: lw a4, 108(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a5, a4 -; RV32IM-NEXT: xor a5, a7, s0 -; RV32IM-NEXT: lw a7, 272(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a7, s2, a7 -; RV32IM-NEXT: lw s0, 264(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t6, t6, s0 -; RV32IM-NEXT: lw s0, 260(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t3, t3, s0 -; RV32IM-NEXT: lw s0, 256(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t4, t4, s0 -; RV32IM-NEXT: lw s0, 352(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t1, t1, s0 -; RV32IM-NEXT: lw s0, 328(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t2, t2, s0 -; RV32IM-NEXT: lw s0, 320(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a6, a6, s0 -; RV32IM-NEXT: lw s0, 312(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t5, t5, s0 -; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, s0 -; RV32IM-NEXT: lw s0, 436(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a1, s0 -; RV32IM-NEXT: lw s0, 444(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, s0 -; RV32IM-NEXT: lw s0, 456(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a3, s0 -; RV32IM-NEXT: lw s0, 416(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, s0 -; RV32IM-NEXT: xor a5, a5, a7 -; RV32IM-NEXT: lw a7, 284(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a7, t6, a7 -; RV32IM-NEXT: lw t6, 276(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t3, t3, t6 -; RV32IM-NEXT: lw t6, 268(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t4, t4, t6 -; RV32IM-NEXT: lw t6, 364(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t1, t1, t6 -; RV32IM-NEXT: lw t6, 356(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t2, t2, t6 -; RV32IM-NEXT: lw t6, 324(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t5, t5, t6 -; RV32IM-NEXT: lw t6, 452(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, t6 -; RV32IM-NEXT: lw t6, 420(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, t6 -; RV32IM-NEXT: xor a5, a5, a7 -; RV32IM-NEXT: lw a7, 288(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a7, t3, a7 -; RV32IM-NEXT: lw t3, 280(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t3, t4, t3 -; RV32IM-NEXT: lw t4, 372(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t1, t1, t4 -; RV32IM-NEXT: lw t4, 360(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t2, t2, t4 -; RV32IM-NEXT: lw t4, 336(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t4, t5, t4 -; RV32IM-NEXT: xor a1, a0, a1 -; RV32IM-NEXT: xor a1, a1, a2 -; RV32IM-NEXT: lw a2, 428(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a4, a2 -; RV32IM-NEXT: xor a4, a5, a7 -; RV32IM-NEXT: lw a5, 292(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, t3, a5 -; RV32IM-NEXT: lw a7, 388(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a7, t1, a7 -; RV32IM-NEXT: lw t1, 368(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t1, t2, t1 -; RV32IM-NEXT: lw t2, 348(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t2, t4, t2 -; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: lw a3, 448(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a2, a2, a3 -; RV32IM-NEXT: xor a4, a4, a5 -; RV32IM-NEXT: lw a3, 392(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a7, a3 -; RV32IM-NEXT: lw a5, 376(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, t1, a5 -; RV32IM-NEXT: xor a3, a4, a3 -; RV32IM-NEXT: lw a4, 380(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a5, a4 -; RV32IM-NEXT: xor a3, a3, t0 -; RV32IM-NEXT: slli a0, a0, 24 -; RV32IM-NEXT: lw a5, 384(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, a5 -; RV32IM-NEXT: lw a7, 396(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a5, a1, a7 -; RV32IM-NEXT: slli a5, a5, 8 -; RV32IM-NEXT: or a0, a0, a5 -; RV32IM-NEXT: xor a2, a1, a2 -; RV32IM-NEXT: srli a1, a1, 8 -; RV32IM-NEXT: and a1, a1, a7 -; RV32IM-NEXT: srli a2, a2, 24 -; RV32IM-NEXT: or a1, a1, a2 -; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: xor a4, a3, a4 -; RV32IM-NEXT: xor a1, a4, a6 -; RV32IM-NEXT: and a2, a1, a7 -; RV32IM-NEXT: xor a4, a1, t2 -; RV32IM-NEXT: srli a1, a1, 8 -; RV32IM-NEXT: and a1, a1, a7 -; RV32IM-NEXT: srli a5, a0, 4 -; RV32IM-NEXT: lw a6, 400(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a6 -; RV32IM-NEXT: and a5, a5, a6 -; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a5, a0 -; RV32IM-NEXT: slli a3, a3, 24 -; RV32IM-NEXT: slli a2, a2, 8 -; RV32IM-NEXT: or a2, a3, a2 -; RV32IM-NEXT: srli a4, a4, 24 -; RV32IM-NEXT: or a1, a1, a4 -; RV32IM-NEXT: or a1, a2, a1 -; RV32IM-NEXT: srli a2, a0, 2 -; RV32IM-NEXT: lw a3, 404(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a3 -; RV32IM-NEXT: and a2, a2, a3 -; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: srli a2, a1, 4 -; RV32IM-NEXT: and a1, a1, a6 -; RV32IM-NEXT: and a2, a2, a6 -; RV32IM-NEXT: slli a1, a1, 4 -; RV32IM-NEXT: or a1, a2, a1 -; RV32IM-NEXT: srli a2, a1, 2 -; RV32IM-NEXT: and a1, a1, a3 -; RV32IM-NEXT: and a2, a2, a3 -; RV32IM-NEXT: srli a3, a0, 1 -; RV32IM-NEXT: lw a5, 408(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a4, a0, a5 -; RV32IM-NEXT: and a3, a3, a5 -; RV32IM-NEXT: slli a1, a1, 2 -; RV32IM-NEXT: or a1, a2, a1 -; RV32IM-NEXT: srli a0, a1, 1 -; RV32IM-NEXT: and a1, a1, a5 -; RV32IM-NEXT: and a0, a0, a5 -; RV32IM-NEXT: slli a1, a1, 1 -; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: slli a1, a4, 1 -; RV32IM-NEXT: or a1, a3, a1 -; RV32IM-NEXT: lw ra, 508(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s0, 504(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s1, 500(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s2, 496(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s3, 492(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s4, 488(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s5, 484(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s6, 480(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s7, 476(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s8, 472(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 468(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s10, 464(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s11, 460(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 512 -; RV32IM-NEXT: ret -; -; RV64IM-LABEL: clmulr_i64: -; RV64IM: # %bb.0: -; RV64IM-NEXT: addi sp, sp, -448 -; RV64IM-NEXT: sd ra, 440(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s0, 432(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 424(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s2, 416(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s3, 408(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s4, 400(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s5, 392(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s6, 384(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s7, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s8, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s9, 360(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s11, 344(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a2, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 -; RV64IM-NEXT: li a3, 255 -; RV64IM-NEXT: srli a5, a0, 40 -; RV64IM-NEXT: lui s3, 16 -; RV64IM-NEXT: srli s0, a0, 56 -; RV64IM-NEXT: srliw t2, a0, 24 -; RV64IM-NEXT: slli t0, a0, 56 -; RV64IM-NEXT: lui t3, 61681 -; RV64IM-NEXT: lui t4, 209715 -; RV64IM-NEXT: lui t6, 349525 -; RV64IM-NEXT: li a7, 1 -; RV64IM-NEXT: lui s5, 2 -; RV64IM-NEXT: lui t1, 4 -; RV64IM-NEXT: lui a4, 128 -; RV64IM-NEXT: lui s7, 256 -; RV64IM-NEXT: lui s8, 4096 -; RV64IM-NEXT: lui s10, 8192 -; RV64IM-NEXT: lui a1, 4080 -; RV64IM-NEXT: and a2, a2, a1 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: sd a3, 336(sp) # 8-byte Folded Spill -; RV64IM-NEXT: addi s1, s3, -256 -; RV64IM-NEXT: and t5, a0, a1 -; RV64IM-NEXT: slli a1, t2, 32 -; RV64IM-NEXT: addi s9, t3, -241 -; RV64IM-NEXT: addi t4, t4, 819 -; RV64IM-NEXT: addi t2, t6, 1365 -; RV64IM-NEXT: slli t3, a7, 11 -; RV64IM-NEXT: slli s11, a7, 32 -; RV64IM-NEXT: slli ra, a7, 33 -; RV64IM-NEXT: slli t6, a7, 34 -; RV64IM-NEXT: slli s2, a7, 35 -; RV64IM-NEXT: slli s4, a7, 36 -; RV64IM-NEXT: sd s4, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a6, a3 -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: slli a3, a7, 37 -; RV64IM-NEXT: sd a3, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s1, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a3, a5, s1 -; RV64IM-NEXT: or a3, a3, s0 -; RV64IM-NEXT: slli a5, a7, 38 -; RV64IM-NEXT: sd a5, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t5, t5, 24 -; RV64IM-NEXT: and a0, a0, s1 -; RV64IM-NEXT: or a1, t5, a1 -; RV64IM-NEXT: slli a5, s9, 32 -; RV64IM-NEXT: add a5, s9, a5 -; RV64IM-NEXT: slli s0, t4, 32 -; RV64IM-NEXT: add t4, t4, s0 -; RV64IM-NEXT: slli s4, t2, 32 -; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: add t2, t2, s4 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a0, t0, a0 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: sd a5, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, a5 -; RV64IM-NEXT: and a1, a1, a5 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: sd t4, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t4 -; RV64IM-NEXT: and a1, a1, t4 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: sd t2, 328(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, t2 -; RV64IM-NEXT: and a1, a1, t2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or t0, a1, a0 -; RV64IM-NEXT: andi a0, t0, 2 -; RV64IM-NEXT: andi a1, t0, 1 -; RV64IM-NEXT: andi a2, t0, 4 -; RV64IM-NEXT: andi a3, t0, 8 -; RV64IM-NEXT: andi a5, t0, 16 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 32 -; RV64IM-NEXT: mul a1, t0, a2 -; RV64IM-NEXT: mul a2, t0, a3 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a1, t0, 256 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a2, a0 -; RV64IM-NEXT: sd a0, 280(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi a0, t0, 512 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: sd a0, 272(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli t4, a7, 39 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: and a1, t0, t1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 264(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 40 -; RV64IM-NEXT: and a1, t0, a4 -; RV64IM-NEXT: and a2, t0, s7 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 41 -; RV64IM-NEXT: and a2, t0, s8 -; RV64IM-NEXT: and a3, t0, s10 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 224(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 48 -; RV64IM-NEXT: and a3, t0, s11 -; RV64IM-NEXT: and a4, t0, ra -; RV64IM-NEXT: mul a3, t0, a3 -; RV64IM-NEXT: mul a4, t0, a4 -; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 216(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a3, a7, 49 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 208(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 56 -; RV64IM-NEXT: and a1, t0, a2 -; RV64IM-NEXT: and a2, t0, a3 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a1, a7, 57 -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, a1 -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: mul a1, t0, a1 -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: sd a0, 192(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a2, a7, 42 -; RV64IM-NEXT: slli ra, a7, 43 -; RV64IM-NEXT: slli a3, a7, 44 -; RV64IM-NEXT: slli a4, a7, 45 -; RV64IM-NEXT: slli t5, a7, 46 -; RV64IM-NEXT: slli s0, a7, 47 -; RV64IM-NEXT: slli s1, a7, 50 -; RV64IM-NEXT: slli a0, a7, 51 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 52 -; RV64IM-NEXT: sd a0, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 53 -; RV64IM-NEXT: sd a0, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 54 -; RV64IM-NEXT: sd a0, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 55 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 58 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 59 -; RV64IM-NEXT: sd a0, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 60 -; RV64IM-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a7, 61 -; RV64IM-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a7, a7, 62 -; RV64IM-NEXT: sd a7, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, t3 -; RV64IM-NEXT: sd a0, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s7, 1 -; RV64IM-NEXT: and a0, t0, s7 -; RV64IM-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s8, 8 -; RV64IM-NEXT: and a0, t0, s8 -; RV64IM-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s3 -; RV64IM-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s6, 32 -; RV64IM-NEXT: and a0, t0, s6 -; RV64IM-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s10, 64 -; RV64IM-NEXT: and a0, t0, s10 -; RV64IM-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s11, 512 -; RV64IM-NEXT: and a0, t0, s11 -; RV64IM-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s4, 1024 -; RV64IM-NEXT: and a0, t0, s4 -; RV64IM-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s5, 2048 -; RV64IM-NEXT: and a0, t0, s5 -; RV64IM-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui s9, 16384 -; RV64IM-NEXT: and a0, t0, s9 -; RV64IM-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui a5, 32768 -; RV64IM-NEXT: and a5, t0, a5 -; RV64IM-NEXT: lui a6, 65536 -; RV64IM-NEXT: and a6, t0, a6 -; RV64IM-NEXT: lui t1, 131072 -; RV64IM-NEXT: and t1, t0, t1 -; RV64IM-NEXT: lui t2, 262144 -; RV64IM-NEXT: and t2, t0, t2 -; RV64IM-NEXT: and a0, t0, t6 -; RV64IM-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, t0, s2 -; RV64IM-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: sd a0, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, t0, a0 -; RV64IM-NEXT: and a1, t0, t4 -; RV64IM-NEXT: and a7, t0, a2 -; RV64IM-NEXT: and ra, t0, ra -; RV64IM-NEXT: and t3, t0, a3 -; RV64IM-NEXT: and t4, t0, a4 -; RV64IM-NEXT: and t5, t0, t5 -; RV64IM-NEXT: and t6, t0, s0 -; RV64IM-NEXT: and s0, t0, s1 -; RV64IM-NEXT: ld a2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s1, t0, a2 -; RV64IM-NEXT: ld a2, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s2, t0, a2 -; RV64IM-NEXT: ld a2, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s3, t0, a2 -; RV64IM-NEXT: ld a2, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s4, t0, a2 -; RV64IM-NEXT: ld a2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s5, t0, a2 -; RV64IM-NEXT: ld a2, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s6, t0, a2 -; RV64IM-NEXT: ld a2, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s7, t0, a2 -; RV64IM-NEXT: ld a2, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s8, t0, a2 -; RV64IM-NEXT: ld a2, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s9, t0, a2 -; RV64IM-NEXT: ld a2, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s10, t0, a2 -; RV64IM-NEXT: andi s11, t0, 64 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 128 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: andi s11, t0, 1024 -; RV64IM-NEXT: mul a2, t0, s11 -; RV64IM-NEXT: sd a2, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 120(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul s11, t0, a2 -; RV64IM-NEXT: ld a2, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 104(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 72(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 64(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a4, t0, a2 -; RV64IM-NEXT: ld a2, 56(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 96(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 40(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: sd a2, 136(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a2, 32(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a3, t0, a2 -; RV64IM-NEXT: mul a2, t0, a5 -; RV64IM-NEXT: sd a2, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, a6 -; RV64IM-NEXT: sd a2, 128(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t1 -; RV64IM-NEXT: sd a2, 160(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a2, t0, t2 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srliw t2, t0, 31 -; RV64IM-NEXT: slli t2, t2, 31 -; RV64IM-NEXT: ld a2, 24(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, t0, a2 -; RV64IM-NEXT: ld a5, 16(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a5, t0, a5 -; RV64IM-NEXT: ld a6, 8(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t1, t0, a6 -; RV64IM-NEXT: ld a6, 0(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a6, t0, a6 -; RV64IM-NEXT: sd a6, 112(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a0 -; RV64IM-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a0, t0, a1 -; RV64IM-NEXT: sd a0, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul a7, t0, a7 -; RV64IM-NEXT: mul ra, t0, ra -; RV64IM-NEXT: mul a6, t0, t3 -; RV64IM-NEXT: mul t4, t0, t4 -; RV64IM-NEXT: mul t5, t0, t5 -; RV64IM-NEXT: mul a0, t0, t6 -; RV64IM-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul t6, t0, s0 -; RV64IM-NEXT: mul s0, t0, s1 -; RV64IM-NEXT: mul s1, t0, s2 -; RV64IM-NEXT: mul s2, t0, s3 -; RV64IM-NEXT: mul s3, t0, s4 -; RV64IM-NEXT: mul s4, t0, s5 -; RV64IM-NEXT: mul s5, t0, s6 -; RV64IM-NEXT: mul s6, t0, s7 -; RV64IM-NEXT: mul s7, t0, s8 -; RV64IM-NEXT: mul s8, t0, s9 -; RV64IM-NEXT: mul s9, t0, s10 -; RV64IM-NEXT: srli s10, t0, 63 -; RV64IM-NEXT: slli s10, s10, 63 -; RV64IM-NEXT: mul t2, t0, t2 -; RV64IM-NEXT: mul t0, t0, s10 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, a0, a1 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, a1 -; RV64IM-NEXT: ld a1, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld t3, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s11, t3, s11 -; RV64IM-NEXT: ld t3, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, t3, a4 -; RV64IM-NEXT: ld t3, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, t3, a3 -; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, t3, a2 -; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, t3, a7 -; RV64IM-NEXT: ld t3, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t6, t3, t6 -; RV64IM-NEXT: ld t3, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, t3, s5 -; RV64IM-NEXT: xor a0, s10, a0 -; RV64IM-NEXT: ld t3, 120(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s10, s11, t3 -; RV64IM-NEXT: ld t3, 96(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, a5 -; RV64IM-NEXT: xor a5, a7, ra -; RV64IM-NEXT: xor a7, t6, s0 -; RV64IM-NEXT: xor t6, s5, s6 -; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a0, a0, t3 -; RV64IM-NEXT: ld t3, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, t3 -; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s10, t3 -; RV64IM-NEXT: ld t3, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a4, t3 -; RV64IM-NEXT: ld t3, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: xor a6, a7, s1 -; RV64IM-NEXT: xor a7, t6, s7 -; RV64IM-NEXT: ld t1, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s0, t1 -; RV64IM-NEXT: ld t3, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t3 -; RV64IM-NEXT: ld t3, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t3 -; RV64IM-NEXT: xor a5, a5, t4 -; RV64IM-NEXT: xor a6, a6, s2 -; RV64IM-NEXT: xor a7, a7, s8 -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: xor a1, a1, t1 -; RV64IM-NEXT: ld t1, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t1 -; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: xor a5, a5, t5 -; RV64IM-NEXT: xor a6, a6, s3 -; RV64IM-NEXT: xor a7, a7, s9 -; RV64IM-NEXT: xor a1, a1, a4 -; RV64IM-NEXT: xor a3, a3, t2 -; RV64IM-NEXT: ld a4, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a4 -; RV64IM-NEXT: ld a4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a4, a5, a4 -; RV64IM-NEXT: xor a5, a6, s4 -; RV64IM-NEXT: slli a0, a0, 56 -; RV64IM-NEXT: xor a6, a7, t0 -; RV64IM-NEXT: ld t0, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a7, a1, t0 -; RV64IM-NEXT: xor a1, a1, a3 -; RV64IM-NEXT: slli a7, a7, 40 -; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: or a0, a0, a7 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a2, a1, a7 -; RV64IM-NEXT: xor a4, a1, a4 -; RV64IM-NEXT: srli a1, a1, 8 -; RV64IM-NEXT: slli a2, a2, 24 -; RV64IM-NEXT: xor a5, a4, a5 -; RV64IM-NEXT: ld a3, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, a1, a3 -; RV64IM-NEXT: srli a4, a4, 24 -; RV64IM-NEXT: srliw a3, a5, 24 -; RV64IM-NEXT: and a4, a4, a7 -; RV64IM-NEXT: srli a7, a5, 40 -; RV64IM-NEXT: xor a5, a5, a6 -; RV64IM-NEXT: slli a3, a3, 32 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: and a4, a7, t0 -; RV64IM-NEXT: srli a5, a5, 56 -; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: or a4, a4, a5 -; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 -; RV64IM-NEXT: or a0, a0, a1 -; RV64IM-NEXT: srli a1, a0, 4 -; RV64IM-NEXT: ld a2, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 4 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 2 -; RV64IM-NEXT: ld a2, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 2 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: ld a2, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: ld ra, 440(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s0, 432(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s1, 424(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s2, 416(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s3, 408(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 400(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s5, 392(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s6, 384(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s7, 376(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s8, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s9, 360(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s10, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s11, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: addi sp, sp, 448 -; RV64IM-NEXT: ret - %res = call i64 @llvm.clmulr.i64(i64 %a, i64 %b) - ret i64 %res -} - -define i4 @clmulr_constfold_i4() nounwind { -; CHECK-LABEL: clmulr_constfold_i4: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: ret - %res = call i4 @llvm.clmulr.i4(i4 1, i4 2) - ret i4 %res -} - -define i16 @clmulr_constfold_i16() nounwind { -; CHECK-LABEL: clmulr_constfold_i16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 11 -; CHECK-NEXT: addi a0, a0, -1365 -; CHECK-NEXT: ret - %res = call i16 @llvm.clmulr.i16(i16 -2, i16 -1) - ret i16 %res -} diff --git a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll index dd04be1212587..ff4f1646afd2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll @@ -5547,18642 +5547,3 @@ define @clmul_nxv8i64( %x, @llvm.clmul.nxv8i64( %x, %y) ret %a } - -define @clmulr_nxv1i32( %x, %y) nounwind { -; CHECK-LABEL: clmulr_nxv1i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: lui a4, 16 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vsll.vi v11, v8, 24 -; CHECK-NEXT: lui a0, 61681 -; CHECK-NEXT: lui a1, 209715 -; CHECK-NEXT: lui a5, 349525 -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: addi a3, a4, -256 -; CHECK-NEXT: addi a2, a0, -241 -; CHECK-NEXT: addi a1, a1, 819 -; CHECK-NEXT: addi a0, a5, 1365 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v11, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vand.vx v9, v8, a6 -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: vand.vx v10, v8, a5 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vand.vx v11, v8, a5 -; CHECK-NEXT: li a5, 128 -; CHECK-NEXT: vand.vx v12, v8, a5 -; CHECK-NEXT: li a5, 256 -; CHECK-NEXT: vand.vx v13, v8, a5 -; CHECK-NEXT: li a5, 512 -; CHECK-NEXT: vand.vx v14, v8, a5 -; CHECK-NEXT: li a5, 1024 -; CHECK-NEXT: vand.vx v15, v8, a5 -; CHECK-NEXT: li a5, 1 -; CHECK-NEXT: slli a5, a5, 11 -; CHECK-NEXT: vand.vx v16, v8, a5 -; CHECK-NEXT: lui a5, 1 -; CHECK-NEXT: vand.vx v17, v8, a5 -; CHECK-NEXT: lui a5, 2 -; CHECK-NEXT: vand.vx v18, v8, a5 -; CHECK-NEXT: lui a5, 4 -; CHECK-NEXT: vand.vx v19, v8, a5 -; CHECK-NEXT: lui a5, 8 -; CHECK-NEXT: vand.vx v20, v8, a5 -; CHECK-NEXT: lui a5, 32 -; CHECK-NEXT: vand.vx v21, v8, a4 -; CHECK-NEXT: lui a4, 64 -; CHECK-NEXT: vand.vx v22, v8, a5 -; CHECK-NEXT: lui a5, 128 -; CHECK-NEXT: vand.vx v23, v8, a4 -; CHECK-NEXT: lui a4, 256 -; CHECK-NEXT: vand.vx v24, v8, a5 -; CHECK-NEXT: lui a5, 512 -; CHECK-NEXT: vand.vx v25, v8, a4 -; CHECK-NEXT: lui a4, 1024 -; CHECK-NEXT: vand.vx v26, v8, a5 -; CHECK-NEXT: lui a5, 2048 -; CHECK-NEXT: vand.vx v27, v8, a4 -; CHECK-NEXT: lui a4, 4096 -; CHECK-NEXT: vand.vx v28, v8, a5 -; CHECK-NEXT: lui a5, 8192 -; CHECK-NEXT: vand.vx v29, v8, a4 -; CHECK-NEXT: lui a4, 16384 -; CHECK-NEXT: vand.vx v30, v8, a5 -; CHECK-NEXT: lui a5, 32768 -; CHECK-NEXT: vand.vx v31, v8, a4 -; CHECK-NEXT: lui a4, 65536 -; CHECK-NEXT: vand.vx v7, v8, a5 -; CHECK-NEXT: lui a5, 131072 -; CHECK-NEXT: vand.vx v6, v8, a4 -; CHECK-NEXT: lui a4, 262144 -; CHECK-NEXT: vand.vx v5, v8, a5 -; CHECK-NEXT: lui a5, 524288 -; CHECK-NEXT: vand.vi v4, v8, 2 -; CHECK-NEXT: vand.vi v3, v8, 1 -; CHECK-NEXT: vand.vi v2, v8, 4 -; CHECK-NEXT: vand.vi v1, v8, 8 -; CHECK-NEXT: vand.vx v0, v8, a4 -; CHECK-NEXT: vmul.vv v4, v8, v4 -; CHECK-NEXT: vmul.vv v3, v8, v3 -; CHECK-NEXT: vmul.vv v2, v8, v2 -; CHECK-NEXT: vmul.vv v1, v8, v1 -; CHECK-NEXT: vmul.vv v9, v8, v9 -; CHECK-NEXT: vmul.vv v10, v8, v10 -; CHECK-NEXT: vmul.vv v11, v8, v11 -; CHECK-NEXT: vmul.vv v12, v8, v12 -; CHECK-NEXT: vmul.vv v13, v8, v13 -; CHECK-NEXT: vmul.vv v14, v8, v14 -; CHECK-NEXT: vmul.vv v15, v8, v15 -; CHECK-NEXT: vmul.vv v16, v8, v16 -; CHECK-NEXT: vmul.vv v17, v8, v17 -; CHECK-NEXT: vmul.vv v18, v8, v18 -; CHECK-NEXT: vmul.vv v19, v8, v19 -; CHECK-NEXT: vmul.vv v20, v8, v20 -; CHECK-NEXT: vmul.vv v21, v8, v21 -; CHECK-NEXT: vmul.vv v22, v8, v22 -; CHECK-NEXT: vmul.vv v23, v8, v23 -; CHECK-NEXT: vmul.vv v24, v8, v24 -; CHECK-NEXT: vmul.vv v25, v8, v25 -; CHECK-NEXT: vmul.vv v26, v8, v26 -; CHECK-NEXT: vmul.vv v27, v8, v27 -; CHECK-NEXT: vmul.vv v28, v8, v28 -; CHECK-NEXT: vmul.vv v29, v8, v29 -; CHECK-NEXT: vmul.vv v30, v8, v30 -; CHECK-NEXT: vmul.vv v31, v8, v31 -; CHECK-NEXT: vmul.vv v7, v8, v7 -; CHECK-NEXT: vmul.vv v6, v8, v6 -; CHECK-NEXT: vmul.vv v5, v8, v5 -; CHECK-NEXT: vmul.vv v0, v8, v0 -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # vscale x 8-byte Folded Spill -; CHECK-NEXT: vand.vx v0, v8, a5 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vxor.vv v4, v3, v4 -; CHECK-NEXT: vxor.vv v4, v4, v2 -; CHECK-NEXT: vxor.vv v4, v4, v1 -; CHECK-NEXT: vxor.vv v9, v4, v9 -; CHECK-NEXT: vxor.vv v9, v9, v10 -; CHECK-NEXT: vxor.vv v9, v9, v11 -; CHECK-NEXT: vxor.vv v9, v9, v12 -; CHECK-NEXT: vxor.vv v10, v9, v13 -; CHECK-NEXT: vxor.vv v10, v10, v14 -; CHECK-NEXT: vxor.vv v10, v10, v15 -; CHECK-NEXT: vxor.vv v10, v10, v16 -; CHECK-NEXT: vxor.vv v10, v10, v17 -; CHECK-NEXT: vxor.vv v10, v10, v18 -; CHECK-NEXT: vxor.vv v10, v10, v19 -; CHECK-NEXT: vxor.vv v10, v10, v20 -; CHECK-NEXT: vxor.vv v10, v10, v21 -; CHECK-NEXT: vxor.vv v10, v10, v22 -; CHECK-NEXT: vxor.vv v10, v10, v23 -; CHECK-NEXT: vxor.vv v10, v10, v24 -; CHECK-NEXT: vxor.vv v10, v10, v25 -; CHECK-NEXT: vxor.vv v10, v10, v26 -; CHECK-NEXT: vxor.vv v10, v10, v27 -; CHECK-NEXT: vxor.vv v10, v10, v28 -; CHECK-NEXT: vsll.vi v9, v9, 24 -; CHECK-NEXT: vxor.vv v11, v10, v29 -; CHECK-NEXT: vxor.vv v11, v11, v30 -; CHECK-NEXT: vand.vx v12, v10, a3 -; CHECK-NEXT: vsll.vi v12, v12, 8 -; CHECK-NEXT: vor.vv v9, v9, v12 -; CHECK-NEXT: vxor.vv v11, v11, v31 -; CHECK-NEXT: vxor.vv v11, v11, v7 -; CHECK-NEXT: vxor.vv v11, v11, v6 -; CHECK-NEXT: vxor.vv v11, v11, v5 -; CHECK-NEXT: vsrl.vi v10, v10, 8 -; CHECK-NEXT: vand.vx v10, v10, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl1r.v v12, (a3) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vxor.vv v11, v11, v12 -; CHECK-NEXT: vxor.vv v8, v11, v8 -; CHECK-NEXT: vsrl.vi v8, v8, 24 -; CHECK-NEXT: vor.vv v8, v10, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret - %a = call @llvm.clmulr.nxv1i32( %x, %y) - ret %a -} - -define @clmulr_nxv2i32( %x, %y) nounwind { -; CHECK-LABEL: clmulr_nxv2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: lui a4, 16 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vsll.vi v11, v8, 24 -; CHECK-NEXT: lui a0, 61681 -; CHECK-NEXT: lui a1, 209715 -; CHECK-NEXT: lui a5, 349525 -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: addi a3, a4, -256 -; CHECK-NEXT: addi a2, a0, -241 -; CHECK-NEXT: addi a1, a1, 819 -; CHECK-NEXT: addi a0, a5, 1365 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v11, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vand.vx v9, v8, a6 -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: vand.vx v10, v8, a5 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vand.vx v11, v8, a5 -; CHECK-NEXT: li a5, 128 -; CHECK-NEXT: vand.vx v12, v8, a5 -; CHECK-NEXT: li a5, 256 -; CHECK-NEXT: vand.vx v13, v8, a5 -; CHECK-NEXT: li a5, 512 -; CHECK-NEXT: vand.vx v14, v8, a5 -; CHECK-NEXT: li a5, 1024 -; CHECK-NEXT: vand.vx v15, v8, a5 -; CHECK-NEXT: li a5, 1 -; CHECK-NEXT: slli a5, a5, 11 -; CHECK-NEXT: vand.vx v16, v8, a5 -; CHECK-NEXT: lui a5, 1 -; CHECK-NEXT: vand.vx v17, v8, a5 -; CHECK-NEXT: lui a5, 2 -; CHECK-NEXT: vand.vx v18, v8, a5 -; CHECK-NEXT: lui a5, 4 -; CHECK-NEXT: vand.vx v19, v8, a5 -; CHECK-NEXT: lui a5, 8 -; CHECK-NEXT: vand.vx v20, v8, a5 -; CHECK-NEXT: lui a5, 32 -; CHECK-NEXT: vand.vx v21, v8, a4 -; CHECK-NEXT: lui a4, 64 -; CHECK-NEXT: vand.vx v22, v8, a5 -; CHECK-NEXT: lui a5, 128 -; CHECK-NEXT: vand.vx v23, v8, a4 -; CHECK-NEXT: lui a4, 256 -; CHECK-NEXT: vand.vx v24, v8, a5 -; CHECK-NEXT: lui a5, 512 -; CHECK-NEXT: vand.vx v25, v8, a4 -; CHECK-NEXT: lui a4, 1024 -; CHECK-NEXT: vand.vx v26, v8, a5 -; CHECK-NEXT: lui a5, 2048 -; CHECK-NEXT: vand.vx v27, v8, a4 -; CHECK-NEXT: lui a4, 4096 -; CHECK-NEXT: vand.vx v28, v8, a5 -; CHECK-NEXT: lui a5, 8192 -; CHECK-NEXT: vand.vx v29, v8, a4 -; CHECK-NEXT: lui a4, 16384 -; CHECK-NEXT: vand.vx v30, v8, a5 -; CHECK-NEXT: lui a5, 32768 -; CHECK-NEXT: vand.vx v31, v8, a4 -; CHECK-NEXT: lui a4, 65536 -; CHECK-NEXT: vand.vx v7, v8, a5 -; CHECK-NEXT: lui a5, 131072 -; CHECK-NEXT: vand.vx v6, v8, a4 -; CHECK-NEXT: lui a4, 262144 -; CHECK-NEXT: vand.vx v5, v8, a5 -; CHECK-NEXT: lui a5, 524288 -; CHECK-NEXT: vand.vi v4, v8, 2 -; CHECK-NEXT: vand.vi v3, v8, 1 -; CHECK-NEXT: vand.vi v2, v8, 4 -; CHECK-NEXT: vand.vi v1, v8, 8 -; CHECK-NEXT: vand.vx v0, v8, a4 -; CHECK-NEXT: vmul.vv v4, v8, v4 -; CHECK-NEXT: vmul.vv v3, v8, v3 -; CHECK-NEXT: vmul.vv v2, v8, v2 -; CHECK-NEXT: vmul.vv v1, v8, v1 -; CHECK-NEXT: vmul.vv v9, v8, v9 -; CHECK-NEXT: vmul.vv v10, v8, v10 -; CHECK-NEXT: vmul.vv v11, v8, v11 -; CHECK-NEXT: vmul.vv v12, v8, v12 -; CHECK-NEXT: vmul.vv v13, v8, v13 -; CHECK-NEXT: vmul.vv v14, v8, v14 -; CHECK-NEXT: vmul.vv v15, v8, v15 -; CHECK-NEXT: vmul.vv v16, v8, v16 -; CHECK-NEXT: vmul.vv v17, v8, v17 -; CHECK-NEXT: vmul.vv v18, v8, v18 -; CHECK-NEXT: vmul.vv v19, v8, v19 -; CHECK-NEXT: vmul.vv v20, v8, v20 -; CHECK-NEXT: vmul.vv v21, v8, v21 -; CHECK-NEXT: vmul.vv v22, v8, v22 -; CHECK-NEXT: vmul.vv v23, v8, v23 -; CHECK-NEXT: vmul.vv v24, v8, v24 -; CHECK-NEXT: vmul.vv v25, v8, v25 -; CHECK-NEXT: vmul.vv v26, v8, v26 -; CHECK-NEXT: vmul.vv v27, v8, v27 -; CHECK-NEXT: vmul.vv v28, v8, v28 -; CHECK-NEXT: vmul.vv v29, v8, v29 -; CHECK-NEXT: vmul.vv v30, v8, v30 -; CHECK-NEXT: vmul.vv v31, v8, v31 -; CHECK-NEXT: vmul.vv v7, v8, v7 -; CHECK-NEXT: vmul.vv v6, v8, v6 -; CHECK-NEXT: vmul.vv v5, v8, v5 -; CHECK-NEXT: vmul.vv v0, v8, v0 -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # vscale x 8-byte Folded Spill -; CHECK-NEXT: vand.vx v0, v8, a5 -; CHECK-NEXT: vmul.vv v8, v8, v0 -; CHECK-NEXT: vxor.vv v4, v3, v4 -; CHECK-NEXT: vxor.vv v4, v4, v2 -; CHECK-NEXT: vxor.vv v4, v4, v1 -; CHECK-NEXT: vxor.vv v9, v4, v9 -; CHECK-NEXT: vxor.vv v9, v9, v10 -; CHECK-NEXT: vxor.vv v9, v9, v11 -; CHECK-NEXT: vxor.vv v9, v9, v12 -; CHECK-NEXT: vxor.vv v10, v9, v13 -; CHECK-NEXT: vxor.vv v10, v10, v14 -; CHECK-NEXT: vxor.vv v10, v10, v15 -; CHECK-NEXT: vxor.vv v10, v10, v16 -; CHECK-NEXT: vxor.vv v10, v10, v17 -; CHECK-NEXT: vxor.vv v10, v10, v18 -; CHECK-NEXT: vxor.vv v10, v10, v19 -; CHECK-NEXT: vxor.vv v10, v10, v20 -; CHECK-NEXT: vxor.vv v10, v10, v21 -; CHECK-NEXT: vxor.vv v10, v10, v22 -; CHECK-NEXT: vxor.vv v10, v10, v23 -; CHECK-NEXT: vxor.vv v10, v10, v24 -; CHECK-NEXT: vxor.vv v10, v10, v25 -; CHECK-NEXT: vxor.vv v10, v10, v26 -; CHECK-NEXT: vxor.vv v10, v10, v27 -; CHECK-NEXT: vxor.vv v10, v10, v28 -; CHECK-NEXT: vsll.vi v9, v9, 24 -; CHECK-NEXT: vxor.vv v11, v10, v29 -; CHECK-NEXT: vxor.vv v11, v11, v30 -; CHECK-NEXT: vand.vx v12, v10, a3 -; CHECK-NEXT: vsll.vi v12, v12, 8 -; CHECK-NEXT: vor.vv v9, v9, v12 -; CHECK-NEXT: vxor.vv v11, v11, v31 -; CHECK-NEXT: vxor.vv v11, v11, v7 -; CHECK-NEXT: vxor.vv v11, v11, v6 -; CHECK-NEXT: vxor.vv v11, v11, v5 -; CHECK-NEXT: vsrl.vi v10, v10, 8 -; CHECK-NEXT: vand.vx v10, v10, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl1r.v v12, (a3) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vxor.vv v11, v11, v12 -; CHECK-NEXT: vxor.vv v8, v11, v8 -; CHECK-NEXT: vsrl.vi v8, v8, 24 -; CHECK-NEXT: vor.vv v8, v10, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret - %a = call @llvm.clmulr.nxv2i32( %x, %y) - ret %a -} - -define @clmulr_nxv4i32( %x, %y) nounwind { -; RV32-LABEL: clmulr_nxv4i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: sw s0, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 52(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 48(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vsll.vi v14, v8, 24 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: lui s6, 349525 -; RV32-NEXT: li t2, 16 -; RV32-NEXT: li t5, 32 -; RV32-NEXT: li s2, 64 -; RV32-NEXT: li s5, 128 -; RV32-NEXT: li s4, 256 -; RV32-NEXT: li s3, 512 -; RV32-NEXT: li s1, 1024 -; RV32-NEXT: li s0, 1 -; RV32-NEXT: lui t6, 1 -; RV32-NEXT: lui t4, 2 -; RV32-NEXT: lui t3, 4 -; RV32-NEXT: lui a5, 8 -; RV32-NEXT: lui a6, 32 -; RV32-NEXT: lui a7, 64 -; RV32-NEXT: lui t0, 128 -; RV32-NEXT: lui t1, 256 -; RV32-NEXT: addi a4, a0, -256 -; RV32-NEXT: addi a3, a1, -241 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: addi a1, s6, 1365 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v10, v10, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v10, v10, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vand.vx v10, v8, t2 -; RV32-NEXT: lui t2, 512 -; RV32-NEXT: vand.vx v12, v8, t5 -; RV32-NEXT: lui t5, 1024 -; RV32-NEXT: vand.vx v14, v8, s2 -; RV32-NEXT: lui s2, 2048 -; RV32-NEXT: vand.vx v16, v8, s5 -; RV32-NEXT: lui s5, 4096 -; RV32-NEXT: vand.vx v26, v8, s4 -; RV32-NEXT: lui s4, 8192 -; RV32-NEXT: vand.vx v28, v8, s3 -; RV32-NEXT: lui s3, 16384 -; RV32-NEXT: vand.vx v18, v8, s1 -; RV32-NEXT: lui s1, 32768 -; RV32-NEXT: slli s0, s0, 11 -; RV32-NEXT: vand.vx v20, v8, s0 -; RV32-NEXT: lui s0, 65536 -; RV32-NEXT: vand.vx v22, v8, t6 -; RV32-NEXT: lui t6, 131072 -; RV32-NEXT: vand.vx v24, v8, t4 -; RV32-NEXT: lui t4, 262144 -; RV32-NEXT: vand.vx v30, v8, t3 -; RV32-NEXT: lui t3, 524288 -; RV32-NEXT: vand.vi v6, v8, 2 -; RV32-NEXT: vand.vi v4, v8, 1 -; RV32-NEXT: vand.vi v2, v8, 4 -; RV32-NEXT: vand.vi v0, v8, 8 -; RV32-NEXT: vmul.vv v6, v8, v6 -; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v6, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v6, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v6, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v26 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v18 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v22 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v24 -; RV32-NEXT: csrr s6, vlenb -; RV32-NEXT: slli s6, s6, 1 -; RV32-NEXT: mv a0, s6 -; RV32-NEXT: slli s6, s6, 2 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add s6, sp, s6 -; RV32-NEXT: addi s6, s6, 32 -; RV32-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v30 -; RV32-NEXT: csrr s6, vlenb -; RV32-NEXT: slli s6, s6, 3 -; RV32-NEXT: add s6, sp, s6 -; RV32-NEXT: addi s6, s6, 32 -; RV32-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: mv s6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a5, a5, s6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 32 -; RV32-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a6 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a7 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t0 -; RV32-NEXT: vmul.vv v6, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t1 -; RV32-NEXT: vmul.vv v30, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t2 -; RV32-NEXT: vmul.vv v28, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t5 -; RV32-NEXT: vmul.vv v26, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s2 -; RV32-NEXT: vmul.vv v22, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s5 -; RV32-NEXT: vmul.vv v18, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s4 -; RV32-NEXT: vmul.vv v16, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s3 -; RV32-NEXT: vmul.vv v24, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s1 -; RV32-NEXT: vmul.vv v20, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s0 -; RV32-NEXT: vmul.vv v12, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t6 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: vand.vx v14, v8, t4 -; RV32-NEXT: vmul.vv v14, v8, v14 -; RV32-NEXT: vand.vx v0, v8, t3 -; RV32-NEXT: vmul.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v0, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v4, v2, v4 -; RV32-NEXT: vxor.vv v6, v4, v6 -; RV32-NEXT: vxor.vv v30, v6, v30 -; RV32-NEXT: vxor.vv v28, v30, v28 -; RV32-NEXT: vxor.vv v26, v28, v26 -; RV32-NEXT: vxor.vv v22, v26, v22 -; RV32-NEXT: vsll.vi v26, v0, 24 -; RV32-NEXT: vxor.vv v18, v22, v18 -; RV32-NEXT: vxor.vv v16, v18, v16 -; RV32-NEXT: vand.vx v18, v22, a4 -; RV32-NEXT: vsll.vi v18, v18, 8 -; RV32-NEXT: vor.vv v18, v26, v18 -; RV32-NEXT: vxor.vv v16, v16, v24 -; RV32-NEXT: vxor.vv v16, v16, v20 -; RV32-NEXT: vxor.vv v12, v16, v12 -; RV32-NEXT: vxor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v22, 8 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vxor.vv v10, v10, v14 -; RV32-NEXT: vxor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v18, v8 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v10, v10, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v10, v10, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw s0, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 52(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 48(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_nxv4i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vsll.vi v14, v8, 24 -; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui s6, 349525 -; RV64-NEXT: li t2, 16 -; RV64-NEXT: li t5, 32 -; RV64-NEXT: li s2, 64 -; RV64-NEXT: li s5, 128 -; RV64-NEXT: li s4, 256 -; RV64-NEXT: li s3, 512 -; RV64-NEXT: li s1, 1024 -; RV64-NEXT: li s0, 1 -; RV64-NEXT: lui t6, 1 -; RV64-NEXT: lui t4, 2 -; RV64-NEXT: lui t3, 4 -; RV64-NEXT: lui a5, 8 -; RV64-NEXT: lui a6, 32 -; RV64-NEXT: lui a7, 64 -; RV64-NEXT: lui t0, 128 -; RV64-NEXT: lui t1, 256 -; RV64-NEXT: addi a4, a0, -256 -; RV64-NEXT: addi a3, a1, -241 -; RV64-NEXT: addi a2, a2, 819 -; RV64-NEXT: addi a1, s6, 1365 -; RV64-NEXT: vand.vx v10, v10, a4 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v10, v10, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v10, v10, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v10, v10, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vand.vx v10, v8, t2 -; RV64-NEXT: lui t2, 512 -; RV64-NEXT: vand.vx v12, v8, t5 -; RV64-NEXT: lui t5, 1024 -; RV64-NEXT: vand.vx v14, v8, s2 -; RV64-NEXT: lui s2, 2048 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: vand.vx v26, v8, s4 -; RV64-NEXT: lui s4, 8192 -; RV64-NEXT: vand.vx v28, v8, s3 -; RV64-NEXT: lui s3, 16384 -; RV64-NEXT: vand.vx v18, v8, s1 -; RV64-NEXT: lui s1, 32768 -; RV64-NEXT: slli s0, s0, 11 -; RV64-NEXT: vand.vx v20, v8, s0 -; RV64-NEXT: lui s0, 65536 -; RV64-NEXT: vand.vx v22, v8, t6 -; RV64-NEXT: lui t6, 131072 -; RV64-NEXT: vand.vx v24, v8, t4 -; RV64-NEXT: lui t4, 262144 -; RV64-NEXT: vand.vx v30, v8, t3 -; RV64-NEXT: lui t3, 524288 -; RV64-NEXT: vand.vi v6, v8, 2 -; RV64-NEXT: vand.vi v4, v8, 1 -; RV64-NEXT: vand.vi v2, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v6, v8, v6 -; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v14 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v26 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v18 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v20 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v22 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v24 -; RV64-NEXT: csrr s6, vlenb -; RV64-NEXT: slli s6, s6, 1 -; RV64-NEXT: mv a0, s6 -; RV64-NEXT: slli s6, s6, 2 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s6, sp, s6 -; RV64-NEXT: addi s6, s6, 32 -; RV64-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v30 -; RV64-NEXT: csrr s6, vlenb -; RV64-NEXT: slli s6, s6, 3 -; RV64-NEXT: add s6, sp, s6 -; RV64-NEXT: addi s6, s6, 32 -; RV64-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a6 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a7 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, t0 -; RV64-NEXT: vmul.vv v6, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t1 -; RV64-NEXT: vmul.vv v30, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t2 -; RV64-NEXT: vmul.vv v28, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t5 -; RV64-NEXT: vmul.vv v26, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s2 -; RV64-NEXT: vmul.vv v22, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v18, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s4 -; RV64-NEXT: vmul.vv v16, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s3 -; RV64-NEXT: vmul.vv v24, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s1 -; RV64-NEXT: vmul.vv v20, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s0 -; RV64-NEXT: vmul.vv v12, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t6 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: vand.vx v14, v8, t4 -; RV64-NEXT: vmul.vv v14, v8, v14 -; RV64-NEXT: vand.vx v0, v8, t3 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v4, v2, v4 -; RV64-NEXT: vxor.vv v6, v4, v6 -; RV64-NEXT: vxor.vv v30, v6, v30 -; RV64-NEXT: vxor.vv v28, v30, v28 -; RV64-NEXT: vxor.vv v26, v28, v26 -; RV64-NEXT: vxor.vv v22, v26, v22 -; RV64-NEXT: vsll.vi v26, v0, 24 -; RV64-NEXT: vxor.vv v18, v22, v18 -; RV64-NEXT: vxor.vv v16, v18, v16 -; RV64-NEXT: vand.vx v18, v22, a4 -; RV64-NEXT: vsll.vi v18, v18, 8 -; RV64-NEXT: vor.vv v18, v26, v18 -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: vxor.vv v16, v16, v20 -; RV64-NEXT: vxor.vv v12, v16, v12 -; RV64-NEXT: vxor.vv v10, v12, v10 -; RV64-NEXT: vsrl.vi v12, v22, 8 -; RV64-NEXT: vand.vx v12, v12, a4 -; RV64-NEXT: vxor.vv v10, v10, v14 -; RV64-NEXT: vxor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v18, v8 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v10, v10, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v10, v10, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v10, v10, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret - %a = call @llvm.clmulr.nxv4i32( %x, %y) - ret %a -} - -define @clmulr_nxv8i32( %x, %y) nounwind { -; RV32-LABEL: clmulr_nxv8i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -80 -; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 64(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 52(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a5, 16 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vsll.vi v20, v8, 24 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: lui ra, 349525 -; RV32-NEXT: li s9, 16 -; RV32-NEXT: li s8, 32 -; RV32-NEXT: li s6, 64 -; RV32-NEXT: li a7, 512 -; RV32-NEXT: li t0, 1024 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: lui t1, 1 -; RV32-NEXT: lui t2, 2 -; RV32-NEXT: lui t3, 4 -; RV32-NEXT: lui t4, 8 -; RV32-NEXT: lui t5, 32 -; RV32-NEXT: lui t6, 64 -; RV32-NEXT: lui s0, 128 -; RV32-NEXT: lui s1, 256 -; RV32-NEXT: lui s2, 512 -; RV32-NEXT: lui s3, 1024 -; RV32-NEXT: lui s4, 2048 -; RV32-NEXT: lui s5, 4096 -; RV32-NEXT: lui s7, 8192 -; RV32-NEXT: lui s10, 16384 -; RV32-NEXT: lui s11, 32768 -; RV32-NEXT: addi a4, a5, -256 -; RV32-NEXT: addi a3, a1, -241 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: addi a1, ra, 1365 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vand.vx v12, v8, s9 -; RV32-NEXT: lui s9, 65536 -; RV32-NEXT: vand.vx v16, v8, s8 -; RV32-NEXT: lui s8, 131072 -; RV32-NEXT: vand.vx v20, v8, s6 -; RV32-NEXT: lui s6, 262144 -; RV32-NEXT: slli ra, a0, 11 -; RV32-NEXT: vand.vi v24, v8, 2 -; RV32-NEXT: vand.vi v28, v8, 1 -; RV32-NEXT: vand.vi v4, v8, 4 -; RV32-NEXT: vand.vi v0, v8, 8 -; RV32-NEXT: vmul.vv v24, v8, v24 -; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v24, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v24, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v24, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: vand.vx v12, v8, a6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 2 -; RV32-NEXT: mv a6, a4 -; RV32-NEXT: slli a4, a4, 1 -; RV32-NEXT: add a6, a6, a4 -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, a4, a6 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a6, 256 -; RV32-NEXT: vand.vx v12, v8, a6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: mv a4, a6 -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: add a6, a6, a4 -; RV32-NEXT: lw a4, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, a7 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 6 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, ra -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, a5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a6, a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s7 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: vand.vx v16, v8, s10 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s11 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s9 -; RV32-NEXT: vmul.vv v28, v8, v16 -; RV32-NEXT: vand.vx v16, v8, s8 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: vand.vx v20, v8, s6 -; RV32-NEXT: vmul.vv v4, v8, v20 -; RV32-NEXT: vand.vx v20, v8, a0 -; RV32-NEXT: vmul.vv v20, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v24, v0, v24 -; RV32-NEXT: vxor.vv v12, v24, v12 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vand.vx v24, v0, a4 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v12, v12, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v12, v12, v24 -; RV32-NEXT: vxor.vv v12, v12, v28 -; RV32-NEXT: vxor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vi v16, v0, 8 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vxor.vv v12, v12, v4 -; RV32-NEXT: vxor.vv v12, v12, v20 -; RV32-NEXT: vsrl.vi v12, v12, 24 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 64(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 52(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 48(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 80 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_nxv8i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -144 -; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a5, 16 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vsll.vi v20, v8, 24 -; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui ra, 349525 -; RV64-NEXT: li s9, 16 -; RV64-NEXT: li s8, 32 -; RV64-NEXT: li s6, 64 -; RV64-NEXT: li a7, 512 -; RV64-NEXT: li t0, 1024 -; RV64-NEXT: li a0, 1 -; RV64-NEXT: lui t1, 1 -; RV64-NEXT: lui t2, 2 -; RV64-NEXT: lui t3, 4 -; RV64-NEXT: lui t4, 8 -; RV64-NEXT: lui t5, 32 -; RV64-NEXT: lui t6, 64 -; RV64-NEXT: lui s0, 128 -; RV64-NEXT: lui s1, 256 -; RV64-NEXT: lui s2, 512 -; RV64-NEXT: lui s3, 1024 -; RV64-NEXT: lui s4, 2048 -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: lui s7, 8192 -; RV64-NEXT: lui s10, 16384 -; RV64-NEXT: lui s11, 32768 -; RV64-NEXT: addi a4, a5, -256 -; RV64-NEXT: addi a3, a1, -241 -; RV64-NEXT: addi a2, a2, 819 -; RV64-NEXT: addi a1, ra, 1365 -; RV64-NEXT: vand.vx v12, v12, a4 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v20, v8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v12, v12, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v12, v12, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v12, v12, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vand.vx v12, v8, s9 -; RV64-NEXT: lui s9, 65536 -; RV64-NEXT: vand.vx v16, v8, s8 -; RV64-NEXT: lui s8, 131072 -; RV64-NEXT: vand.vx v20, v8, s6 -; RV64-NEXT: lui s6, 262144 -; RV64-NEXT: slli ra, a0, 11 -; RV64-NEXT: vand.vi v24, v8, 2 -; RV64-NEXT: vand.vi v28, v8, 1 -; RV64-NEXT: vand.vi v4, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v24, v8, v24 -; RV64-NEXT: sd a4, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v20 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: li a6, 128 -; RV64-NEXT: vand.vx v12, v8, a6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 2 -; RV64-NEXT: mv a6, a4 -; RV64-NEXT: slli a4, a4, 1 -; RV64-NEXT: add a6, a6, a4 -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, a4, a6 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill -; RV64-NEXT: li a6, 256 -; RV64-NEXT: vand.vx v12, v8, a6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 3 -; RV64-NEXT: mv a4, a6 -; RV64-NEXT: slli a6, a6, 3 -; RV64-NEXT: add a6, a6, a4 -; RV64-NEXT: ld a4, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a7 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 4 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t0 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 6 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, ra -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 3 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 4 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s0 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a6, a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: addi a5, sp, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s7 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: vand.vx v16, v8, s10 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s11 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v16, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s9 -; RV64-NEXT: vmul.vv v28, v8, v16 -; RV64-NEXT: vand.vx v16, v8, s8 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: vand.vx v20, v8, s6 -; RV64-NEXT: vmul.vv v4, v8, v20 -; RV64-NEXT: vand.vx v20, v8, a0 -; RV64-NEXT: vmul.vv v20, v8, v20 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v0, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v0, v24 -; RV64-NEXT: vxor.vv v12, v24, v12 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vand.vx v24, v0, a4 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: vxor.vv v12, v12, v28 -; RV64-NEXT: vxor.vv v12, v12, v16 -; RV64-NEXT: vsrl.vi v16, v0, 8 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vxor.vv v12, v12, v4 -; RV64-NEXT: vxor.vv v12, v12, v20 -; RV64-NEXT: vsrl.vi v12, v12, 24 -; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v12, v12, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v12, v12, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v12, v12, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 144 -; RV64-NEXT: ret - %a = call @llvm.clmulr.nxv8i32( %x, %x) - ret %a -} - -define @clmulr_nxv16i32( %x, %y) nounwind { -; RV32-LABEL: clmulr_nxv16i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -80 -; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 64(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 52(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: lui a5, 16 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vsll.vi v0, v8, 24 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: lui ra, 349525 -; RV32-NEXT: li t5, 16 -; RV32-NEXT: li t2, 32 -; RV32-NEXT: li a7, 64 -; RV32-NEXT: li t0, 512 -; RV32-NEXT: li t1, 1024 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: lui t3, 1 -; RV32-NEXT: lui t4, 2 -; RV32-NEXT: lui t6, 4 -; RV32-NEXT: lui s0, 8 -; RV32-NEXT: lui s1, 32 -; RV32-NEXT: lui s2, 64 -; RV32-NEXT: lui s3, 128 -; RV32-NEXT: lui s4, 256 -; RV32-NEXT: lui s5, 512 -; RV32-NEXT: lui s6, 1024 -; RV32-NEXT: lui s7, 2048 -; RV32-NEXT: lui s8, 4096 -; RV32-NEXT: lui s9, 8192 -; RV32-NEXT: lui s10, 16384 -; RV32-NEXT: lui s11, 32768 -; RV32-NEXT: addi a4, a5, -256 -; RV32-NEXT: addi a3, a1, -241 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: addi a1, ra, 1365 -; RV32-NEXT: slli a0, a0, 11 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vand.vi v16, v8, 2 -; RV32-NEXT: vand.vi v24, v8, 1 -; RV32-NEXT: vand.vi v0, v8, 4 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vmul.vv v24, v8, v24 -; RV32-NEXT: vmul.vv v0, v8, v0 -; RV32-NEXT: vand.vi v16, v8, 8 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui ra, 65536 -; RV32-NEXT: vand.vx v16, v8, t5 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui t5, 131072 -; RV32-NEXT: vand.vx v16, v8, t2 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui t2, 262144 -; RV32-NEXT: vand.vx v16, v8, a7 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: vand.vx v16, v8, a6 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: li a6, 256 -; RV32-NEXT: vand.vx v16, v8, a6 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: mv a0, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: mv t0, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add t0, t0, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a6, a6, t0 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t1 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: mv t0, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add t0, t0, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a6, a6, t0 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t3 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a6, a6, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t4 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t6 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a6, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, a5 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s1 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s2 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s3 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s4 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s5 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s6 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s7 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s8 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s9 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s10 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s11 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, ra -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t5 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t2 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, a7 -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v24, v8 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v16, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v24 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vand.vx v24, v8, a4 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v24, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v24, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v24, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v24, v24, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v24, v24, v0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v24, v24, v0 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsrl.vi v24, v24, 24 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 64(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 52(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 48(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 80 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_nxv16i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -144 -; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: lui a5, 16 -; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: vsll.vi v0, v8, 24 -; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui ra, 349525 -; RV64-NEXT: li t5, 16 -; RV64-NEXT: li t2, 32 -; RV64-NEXT: li a7, 64 -; RV64-NEXT: li t0, 512 -; RV64-NEXT: li t1, 1024 -; RV64-NEXT: li a0, 1 -; RV64-NEXT: lui t3, 1 -; RV64-NEXT: lui t4, 2 -; RV64-NEXT: lui t6, 4 -; RV64-NEXT: lui s0, 8 -; RV64-NEXT: lui s1, 32 -; RV64-NEXT: lui s2, 64 -; RV64-NEXT: lui s3, 128 -; RV64-NEXT: lui s4, 256 -; RV64-NEXT: lui s5, 512 -; RV64-NEXT: lui s6, 1024 -; RV64-NEXT: lui s7, 2048 -; RV64-NEXT: lui s8, 4096 -; RV64-NEXT: lui s9, 8192 -; RV64-NEXT: lui s10, 16384 -; RV64-NEXT: lui s11, 32768 -; RV64-NEXT: addi a4, a5, -256 -; RV64-NEXT: addi a3, a1, -241 -; RV64-NEXT: addi a2, a2, 819 -; RV64-NEXT: addi a1, ra, 1365 -; RV64-NEXT: slli a0, a0, 11 -; RV64-NEXT: vand.vx v16, v16, a4 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v0, v8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v16, v16, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v16, v16, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vand.vi v16, v8, 2 -; RV64-NEXT: vand.vi v24, v8, 1 -; RV64-NEXT: vand.vi v0, v8, 4 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v24 -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vand.vi v16, v8, 8 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui ra, 65536 -; RV64-NEXT: vand.vx v16, v8, t5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui t5, 131072 -; RV64-NEXT: vand.vx v16, v8, t2 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui t2, 262144 -; RV64-NEXT: vand.vx v16, v8, a7 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui a7, 524288 -; RV64-NEXT: li a6, 128 -; RV64-NEXT: vand.vx v16, v8, a6 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: li a6, 256 -; RV64-NEXT: vand.vx v16, v8, a6 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 3 -; RV64-NEXT: mv a0, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t0 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 4 -; RV64-NEXT: mv t0, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add t0, t0, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a6, t0 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t1 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 3 -; RV64-NEXT: mv t0, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add t0, t0, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a6, t0 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs8r.v v16, (a6) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t3 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a6, a6, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t4 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t6 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a6, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s0 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 7 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, a5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s1 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s2 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s3 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s4 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s6 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s7 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s8 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s9 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s10 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s11 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, ra -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t2 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, a7 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v24, v8 -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v16, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 7 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: vsrl.vi v8, v8, 8 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vsrl.vi v24, v24, 24 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v16, v16, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v16, v16, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 144 -; RV64-NEXT: ret - %a = call @llvm.clmulr.nxv16i32( %x, %y) - ret %a -} - -define @clmulr_nxv1i64( %x, %y) nounwind { -; RV32-LABEL: clmulr_nxv1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s7, 1044480 -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: li s11, 1 -; RV32-NEXT: li s8, 2 -; RV32-NEXT: li s9, 4 -; RV32-NEXT: li s10, 8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: li a5, 64 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: li ra, 256 -; RV32-NEXT: li a0, 512 -; RV32-NEXT: li a1, 1024 -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: lui t0, 2 -; RV32-NEXT: lui t1, 4 -; RV32-NEXT: lui t2, 8 -; RV32-NEXT: lui t3, 16 -; RV32-NEXT: lui t4, 32 -; RV32-NEXT: lui t5, 64 -; RV32-NEXT: lui t6, 128 -; RV32-NEXT: lui s0, 256 -; RV32-NEXT: lui s1, 512 -; RV32-NEXT: lui s2, 1024 -; RV32-NEXT: lui s3, 2048 -; RV32-NEXT: lui s4, 4096 -; RV32-NEXT: lui s5, 8192 -; RV32-NEXT: lui s6, 16384 -; RV32-NEXT: sw s7, 272(sp) -; RV32-NEXT: lui s7, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw a7, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw s11, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw s8, 252(sp) -; RV32-NEXT: lui s8, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw s9, 244(sp) -; RV32-NEXT: lui s9, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw s10, 236(sp) -; RV32-NEXT: lui s10, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw a3, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw a4, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw a5, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw a6, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw ra, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw a0, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a1, 180(sp) -; RV32-NEXT: slli s11, s11, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw s11, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a2, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw t0, 156(sp) -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw t1, 148(sp) -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw t2, 140(sp) -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw t3, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw t4, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw t5, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw t6, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s0, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s1, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s2, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s3, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s4, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s5, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s6, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s7, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw s8, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw s9, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw s10, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw a7, 12(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v3, a0 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vmv.v.x v2, a0 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vmv.v.x v1, a0 -; RV32-NEXT: addi a0, sp, 272 -; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v0, (a0), zero -; RV32-NEXT: addi a0, sp, 264 -; RV32-NEXT: vlse64.v v13, (a0), zero -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vlse64.v v14, (a0), zero -; RV32-NEXT: addi a0, sp, 248 -; RV32-NEXT: vlse64.v v15, (a0), zero -; RV32-NEXT: addi a0, sp, 240 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: addi a0, sp, 232 -; RV32-NEXT: vlse64.v v17, (a0), zero -; RV32-NEXT: addi a0, sp, 224 -; RV32-NEXT: vlse64.v v18, (a0), zero -; RV32-NEXT: addi a0, sp, 216 -; RV32-NEXT: vlse64.v v19, (a0), zero -; RV32-NEXT: addi a0, sp, 208 -; RV32-NEXT: vlse64.v v20, (a0), zero -; RV32-NEXT: addi a0, sp, 200 -; RV32-NEXT: vlse64.v v21, (a0), zero -; RV32-NEXT: addi a0, sp, 192 -; RV32-NEXT: vlse64.v v22, (a0), zero -; RV32-NEXT: addi a0, sp, 184 -; RV32-NEXT: vlse64.v v23, (a0), zero -; RV32-NEXT: addi a0, sp, 176 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: addi a0, sp, 168 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: addi a0, sp, 152 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: addi a0, sp, 136 -; RV32-NEXT: vlse64.v v29, (a0), zero -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: vlse64.v v30, (a0), zero -; RV32-NEXT: addi a0, sp, 120 -; RV32-NEXT: vlse64.v v31, (a0), zero -; RV32-NEXT: addi a0, sp, 112 -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: addi a0, sp, 104 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: vlse64.v v5, (a0), zero -; RV32-NEXT: addi a0, sp, 88 -; RV32-NEXT: vlse64.v v4, (a0), zero -; RV32-NEXT: li a6, 56 -; RV32-NEXT: vsrl.vi v27, v8, 24 -; RV32-NEXT: vsrl.vx v28, v8, a6 -; RV32-NEXT: li ra, 40 -; RV32-NEXT: vsrl.vx v7, v8, ra -; RV32-NEXT: vsll.vx v6, v8, a6 -; RV32-NEXT: addi a4, t3, -256 -; RV32-NEXT: vand.vx v7, v7, a4 -; RV32-NEXT: vor.vv v28, v7, v28 -; RV32-NEXT: vand.vx v7, v8, a4 -; RV32-NEXT: vsll.vx v7, v7, ra -; RV32-NEXT: vor.vv v7, v6, v7 -; RV32-NEXT: vsrl.vi v6, v8, 8 -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v27, v27, a5 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v6, v6, v0 -; RV32-NEXT: vor.vv v27, v6, v27 -; RV32-NEXT: addi a3, sp, 80 -; RV32-NEXT: vlse64.v v6, (a3), zero -; RV32-NEXT: vor.vv v27, v27, v28 -; RV32-NEXT: vand.vx v28, v8, a5 -; RV32-NEXT: vsll.vi v28, v28, 24 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v28, v8 -; RV32-NEXT: addi a3, sp, 72 -; RV32-NEXT: vlse64.v v28, (a3), zero -; RV32-NEXT: vor.vv v8, v7, v8 -; RV32-NEXT: addi a3, sp, 64 -; RV32-NEXT: vlse64.v v7, (a3), zero -; RV32-NEXT: vor.vv v8, v8, v27 -; RV32-NEXT: vsrl.vi v27, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v3 -; RV32-NEXT: vand.vv v27, v27, v3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: vsrl.vi v27, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v2 -; RV32-NEXT: vand.vv v27, v27, v2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: vsrl.vi v27, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v1 -; RV32-NEXT: vand.vv v27, v27, v1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: addi a3, sp, 56 -; RV32-NEXT: vlse64.v v27, (a3), zero -; RV32-NEXT: vand.vv v13, v8, v13 -; RV32-NEXT: vand.vv v14, v8, v14 -; RV32-NEXT: vand.vv v15, v8, v15 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vv v17, v8, v17 -; RV32-NEXT: vand.vv v18, v8, v18 -; RV32-NEXT: vand.vv v19, v8, v19 -; RV32-NEXT: vand.vv v20, v8, v20 -; RV32-NEXT: vand.vv v21, v8, v21 -; RV32-NEXT: vand.vv v22, v8, v22 -; RV32-NEXT: vand.vv v23, v8, v23 -; RV32-NEXT: vand.vv v24, v8, v24 -; RV32-NEXT: vand.vv v25, v8, v25 -; RV32-NEXT: vand.vv v26, v8, v26 -; RV32-NEXT: vand.vv v3, v8, v9 -; RV32-NEXT: vand.vv v2, v8, v10 -; RV32-NEXT: vand.vv v29, v8, v29 -; RV32-NEXT: vand.vv v30, v8, v30 -; RV32-NEXT: vand.vv v31, v8, v31 -; RV32-NEXT: vand.vv v0, v8, v11 -; RV32-NEXT: vand.vv v9, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v5, v8, v5 -; RV32-NEXT: vand.vv v4, v8, v4 -; RV32-NEXT: vand.vv v6, v8, v6 -; RV32-NEXT: vand.vv v9, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: addi a0, sp, 40 -; RV32-NEXT: vlse64.v v9, (a3), zero -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vand.vv v11, v8, v7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v11, v8, v27 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a2), zero -; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: vlse64.v v11, (a1), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v11 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 8 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 1024 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s11 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t3 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t5 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t6 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s3 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s5 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 2 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s6 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s7 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s8 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s9 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v1, v8, s10 -; RV32-NEXT: vmul.vv v1, v8, v1 -; RV32-NEXT: vmul.vv v9, v8, v13 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v14 -; RV32-NEXT: vmul.vv v11, v8, v15 -; RV32-NEXT: vmul.vv v12, v8, v16 -; RV32-NEXT: vmul.vv v13, v8, v17 -; RV32-NEXT: vmul.vv v14, v8, v18 -; RV32-NEXT: vmul.vv v15, v8, v19 -; RV32-NEXT: vmul.vv v16, v8, v20 -; RV32-NEXT: vmul.vv v17, v8, v21 -; RV32-NEXT: vmul.vv v18, v8, v22 -; RV32-NEXT: vmul.vv v19, v8, v23 -; RV32-NEXT: vmul.vv v20, v8, v24 -; RV32-NEXT: vmul.vv v21, v8, v25 -; RV32-NEXT: vmul.vv v22, v8, v26 -; RV32-NEXT: vmul.vv v23, v8, v3 -; RV32-NEXT: vmul.vv v24, v8, v2 -; RV32-NEXT: vmul.vv v25, v8, v29 -; RV32-NEXT: vmul.vv v26, v8, v30 -; RV32-NEXT: vmul.vv v27, v8, v31 -; RV32-NEXT: vmul.vv v28, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v29, v8, v29 -; RV32-NEXT: vmul.vv v30, v8, v5 -; RV32-NEXT: vmul.vv v31, v8, v4 -; RV32-NEXT: vmul.vv v7, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v6, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v5, v8, v5 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v4, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v3, v8, v3 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v2, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 2 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vxor.vv v8, v8, v1 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: vxor.vv v8, v8, v11 -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vxor.vv v8, v8, v13 -; RV32-NEXT: vxor.vv v8, v8, v14 -; RV32-NEXT: vxor.vv v8, v8, v15 -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v17 -; RV32-NEXT: vxor.vv v8, v8, v18 -; RV32-NEXT: vxor.vv v8, v8, v19 -; RV32-NEXT: vxor.vv v8, v8, v20 -; RV32-NEXT: vxor.vv v8, v8, v21 -; RV32-NEXT: vxor.vv v8, v8, v22 -; RV32-NEXT: vxor.vv v8, v8, v23 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v25 -; RV32-NEXT: vxor.vv v8, v8, v26 -; RV32-NEXT: vxor.vv v8, v8, v27 -; RV32-NEXT: vxor.vv v8, v8, v28 -; RV32-NEXT: vxor.vv v8, v8, v29 -; RV32-NEXT: vxor.vv v8, v8, v30 -; RV32-NEXT: vxor.vv v8, v8, v31 -; RV32-NEXT: vxor.vv v8, v8, v7 -; RV32-NEXT: vxor.vv v8, v8, v6 -; RV32-NEXT: vxor.vv v8, v8, v5 -; RV32-NEXT: vxor.vv v8, v8, v4 -; RV32-NEXT: vxor.vv v8, v8, v3 -; RV32-NEXT: vxor.vv v8, v8, v2 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vx v9, v8, a6 -; RV32-NEXT: vsll.vx v10, v8, a6 -; RV32-NEXT: vsrl.vx v11, v8, ra -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vand.vx v11, v11, a4 -; RV32-NEXT: vsrl.vi v13, v8, 24 -; RV32-NEXT: vand.vx v14, v8, a5 -; RV32-NEXT: vand.vx v13, v13, a5 -; RV32-NEXT: vsll.vx v12, v12, ra -; RV32-NEXT: vsrl.vi v15, v8, 8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v15, v15, v16 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vor.vv v11, v15, v13 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vsll.vi v13, v14, 24 -; RV32-NEXT: vor.vv v8, v13, v8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_nxv1i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -224 -; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: li t2, 255 -; RV64-NEXT: lui t6, 61681 -; RV64-NEXT: lui s0, 209715 -; RV64-NEXT: lui s1, 349525 -; RV64-NEXT: li s10, 16 -; RV64-NEXT: li s9, 32 -; RV64-NEXT: li s8, 64 -; RV64-NEXT: li s5, 128 -; RV64-NEXT: li s6, 256 -; RV64-NEXT: li t5, 512 -; RV64-NEXT: li t3, 1024 -; RV64-NEXT: li t0, 1 -; RV64-NEXT: lui s7, 1 -; RV64-NEXT: lui a1, 2 -; RV64-NEXT: lui t4, 4 -; RV64-NEXT: lui t1, 8 -; RV64-NEXT: lui a7, 32 -; RV64-NEXT: lui a6, 64 -; RV64-NEXT: lui a5, 128 -; RV64-NEXT: lui a4, 256 -; RV64-NEXT: lui a3, 512 -; RV64-NEXT: lui a2, 1024 -; RV64-NEXT: li s11, 56 -; RV64-NEXT: vsrl.vx v11, v8, s11 -; RV64-NEXT: li ra, 40 -; RV64-NEXT: vsrl.vx v12, v8, ra -; RV64-NEXT: addi t6, t6, -241 -; RV64-NEXT: addi s2, s0, 819 -; RV64-NEXT: addi s3, s1, 1365 -; RV64-NEXT: slli s1, t6, 32 -; RV64-NEXT: add s4, t6, s1 -; RV64-NEXT: slli t6, s2, 32 -; RV64-NEXT: add s2, s2, t6 -; RV64-NEXT: slli t6, s3, 32 -; RV64-NEXT: add s3, s3, t6 -; RV64-NEXT: lui s0, 16 -; RV64-NEXT: addi s1, s0, -256 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: slli t6, t2, 24 -; RV64-NEXT: vand.vx v13, v8, a0 -; RV64-NEXT: vsll.vx v14, v8, s11 -; RV64-NEXT: vand.vx v12, v12, s1 -; RV64-NEXT: vand.vx v9, v9, t6 -; RV64-NEXT: vsll.vi v13, v13, 24 -; RV64-NEXT: vand.vx v15, v8, t6 -; RV64-NEXT: vand.vx v8, v8, s1 -; RV64-NEXT: vor.vv v11, v12, v11 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v15, 8 -; RV64-NEXT: vsll.vx v8, v8, ra -; RV64-NEXT: vor.vv v9, v9, v11 -; RV64-NEXT: vor.vv v10, v13, v10 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vand.vx v9, v9, s4 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vand.vx v8, v8, s2 -; RV64-NEXT: vand.vx v9, v9, s2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v8, v8, s3 -; RV64-NEXT: vand.vx v9, v9, s3 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vand.vx v7, v8, s10 -; RV64-NEXT: lui t2, 4096 -; RV64-NEXT: vand.vx v6, v8, s9 -; RV64-NEXT: lui s9, 8192 -; RV64-NEXT: vand.vx v5, v8, s8 -; RV64-NEXT: lui s8, 16384 -; RV64-NEXT: vand.vx v4, v8, s5 -; RV64-NEXT: lui s10, 32768 -; RV64-NEXT: vand.vx v13, v8, s6 -; RV64-NEXT: lui s11, 65536 -; RV64-NEXT: vand.vx v14, v8, t5 -; RV64-NEXT: lui t5, 131072 -; RV64-NEXT: vand.vx v15, v8, t3 -; RV64-NEXT: slli t3, t0, 11 -; RV64-NEXT: vand.vx v16, v8, t3 -; RV64-NEXT: lui t3, 262144 -; RV64-NEXT: vand.vx v17, v8, s7 -; RV64-NEXT: slli a0, t0, 31 -; RV64-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v18, v8, a1 -; RV64-NEXT: slli a0, t0, 32 -; RV64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v19, v8, t4 -; RV64-NEXT: slli a0, t0, 33 -; RV64-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v20, v8, t1 -; RV64-NEXT: slli a0, t0, 34 -; RV64-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v21, v8, s0 -; RV64-NEXT: slli a0, t0, 35 -; RV64-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v22, v8, a7 -; RV64-NEXT: slli a0, t0, 36 -; RV64-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v23, v8, a6 -; RV64-NEXT: slli a0, t0, 37 -; RV64-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v24, v8, a5 -; RV64-NEXT: slli a0, t0, 38 -; RV64-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v25, v8, a4 -; RV64-NEXT: slli a0, t0, 39 -; RV64-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v26, v8, a3 -; RV64-NEXT: slli a0, t0, 40 -; RV64-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v27, v8, a2 -; RV64-NEXT: slli a0, t0, 41 -; RV64-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: lui a0, 2048 -; RV64-NEXT: vand.vx v28, v8, a0 -; RV64-NEXT: slli s5, t0, 42 -; RV64-NEXT: vand.vx v29, v8, t2 -; RV64-NEXT: slli s6, t0, 43 -; RV64-NEXT: vand.vx v30, v8, s9 -; RV64-NEXT: slli s7, t0, 44 -; RV64-NEXT: vand.vx v10, v8, s8 -; RV64-NEXT: slli s8, t0, 45 -; RV64-NEXT: vand.vx v11, v8, s10 -; RV64-NEXT: slli s9, t0, 46 -; RV64-NEXT: vand.vx v12, v8, s11 -; RV64-NEXT: slli s10, t0, 47 -; RV64-NEXT: vand.vx v9, v8, t5 -; RV64-NEXT: slli s11, t0, 48 -; RV64-NEXT: vand.vx v31, v8, t3 -; RV64-NEXT: slli ra, t0, 49 -; RV64-NEXT: slli t5, t0, 50 -; RV64-NEXT: slli t4, t0, 51 -; RV64-NEXT: slli t3, t0, 52 -; RV64-NEXT: slli t2, t0, 53 -; RV64-NEXT: slli t1, t0, 54 -; RV64-NEXT: slli a7, t0, 55 -; RV64-NEXT: slli a6, t0, 56 -; RV64-NEXT: slli a5, t0, 57 -; RV64-NEXT: slli a4, t0, 58 -; RV64-NEXT: slli a3, t0, 59 -; RV64-NEXT: slli a2, t0, 60 -; RV64-NEXT: slli a1, t0, 61 -; RV64-NEXT: slli t0, t0, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vi v3, v8, 2 -; RV64-NEXT: vand.vi v2, v8, 1 -; RV64-NEXT: vand.vi v1, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v3, v8, v3 -; RV64-NEXT: sd t6, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v3, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v3, v8, v2 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v3, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v3, v8, v1 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v3, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vmul.vv v7, v8, v7 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v7, v8, v6 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v7, v8, v5 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v7, v8, v4 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v7, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v13 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v14 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v15 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 4 -; RV64-NEXT: add t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v16 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v17 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 4 -; RV64-NEXT: sub t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v18 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v19 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v20 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v21 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v22 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v23 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 3 -; RV64-NEXT: add t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v24 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v25 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 3 -; RV64-NEXT: sub t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v26 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v27 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 2 -; RV64-NEXT: add t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v28 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v29 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 1 -; RV64-NEXT: add t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v13, v8, v30 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v13, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 4 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v10, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v11 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 5 -; RV64-NEXT: add t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v10, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v12 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 5 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v10, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s0, t6, 5 -; RV64-NEXT: sub t6, s0, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v31 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s0, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr s0, vlenb -; RV64-NEXT: slli s0, s0, 1 -; RV64-NEXT: mv t6, s0 -; RV64-NEXT: slli s0, s0, 2 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: slli s0, s0, 1 -; RV64-NEXT: add s0, s0, t6 -; RV64-NEXT: ld t6, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s0, sp, s0 -; RV64-NEXT: addi s0, s0, 112 -; RV64-NEXT: vs1r.v v9, (s0) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s0, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr s0, vlenb -; RV64-NEXT: add s0, sp, s0 -; RV64-NEXT: addi s0, s0, 112 -; RV64-NEXT: vs1r.v v9, (s0) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: addi s0, sp, 112 -; RV64-NEXT: vs1r.v v9, (s0) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s0, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v4, v8, v9 -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v5, v8, v9 -; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v6, v8, v9 -; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v7, v8, v9 -; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v31, v8, v9 -; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s0 -; RV64-NEXT: vmul.vv v30, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s5 -; RV64-NEXT: vmul.vv v29, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s6 -; RV64-NEXT: vmul.vv v28, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s7 -; RV64-NEXT: vmul.vv v27, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s8 -; RV64-NEXT: vmul.vv v26, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s9 -; RV64-NEXT: vmul.vv v25, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s10 -; RV64-NEXT: vmul.vv v23, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s11 -; RV64-NEXT: vmul.vv v19, v8, v9 -; RV64-NEXT: vand.vx v9, v8, ra -; RV64-NEXT: vmul.vv v14, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t5 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: vand.vx v10, v8, t4 -; RV64-NEXT: vmul.vv v24, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t3 -; RV64-NEXT: vmul.vv v22, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t2 -; RV64-NEXT: vmul.vv v20, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t1 -; RV64-NEXT: vmul.vv v15, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a7 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: vand.vx v11, v8, a6 -; RV64-NEXT: vmul.vv v16, v8, v11 -; RV64-NEXT: vand.vx v11, v8, a5 -; RV64-NEXT: vmul.vv v11, v8, v11 -; RV64-NEXT: vand.vx v12, v8, a4 -; RV64-NEXT: vmul.vv v21, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a3 -; RV64-NEXT: vmul.vv v17, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: vand.vx v13, v8, a1 -; RV64-NEXT: vmul.vv v18, v8, v13 -; RV64-NEXT: vand.vx v13, v8, t0 -; RV64-NEXT: vmul.vv v13, v8, v13 -; RV64-NEXT: vand.vx v2, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v1 -; RV64-NEXT: vxor.vv v2, v2, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v2, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 4 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 4 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 2 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v1, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v3 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsll.vx v2, v2, a0 -; RV64-NEXT: vand.vx v1, v1, s1 -; RV64-NEXT: li a1, 40 -; RV64-NEXT: vsll.vx v1, v1, a1 -; RV64-NEXT: vor.vv v2, v2, v1 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v1, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v0, v1 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 5 -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 5 -; RV64-NEXT: sub a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v0, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl1r.v v3, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v3 -; RV64-NEXT: addi a2, sp, 112 -; RV64-NEXT: vl1r.v v3, (a2) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v3, v1, v3 -; RV64-NEXT: vxor.vv v4, v3, v4 -; RV64-NEXT: vxor.vv v5, v4, v5 -; RV64-NEXT: vxor.vv v6, v5, v6 -; RV64-NEXT: vxor.vv v7, v6, v7 -; RV64-NEXT: vxor.vv v31, v7, v31 -; RV64-NEXT: vxor.vv v30, v31, v30 -; RV64-NEXT: vxor.vv v29, v30, v29 -; RV64-NEXT: vxor.vv v28, v29, v28 -; RV64-NEXT: vxor.vv v27, v28, v27 -; RV64-NEXT: vxor.vv v26, v27, v26 -; RV64-NEXT: vxor.vv v25, v26, v25 -; RV64-NEXT: vxor.vv v23, v25, v23 -; RV64-NEXT: vxor.vv v19, v23, v19 -; RV64-NEXT: vxor.vv v14, v19, v14 -; RV64-NEXT: vxor.vv v9, v14, v9 -; RV64-NEXT: vsrl.vi v14, v7, 8 -; RV64-NEXT: vand.vx v14, v14, t6 -; RV64-NEXT: vsrl.vi v19, v23, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v19, v19, a2 -; RV64-NEXT: vor.vv v14, v14, v19 -; RV64-NEXT: vxor.vv v9, v9, v24 -; RV64-NEXT: vxor.vv v9, v9, v22 -; RV64-NEXT: vxor.vv v9, v9, v20 -; RV64-NEXT: vxor.vv v9, v9, v15 -; RV64-NEXT: vxor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v7, a2 -; RV64-NEXT: vsll.vi v10, v10, 24 -; RV64-NEXT: vxor.vv v15, v9, v16 -; RV64-NEXT: vxor.vv v11, v15, v11 -; RV64-NEXT: vand.vx v15, v9, t6 -; RV64-NEXT: vsll.vi v15, v15, 8 -; RV64-NEXT: vor.vv v10, v10, v15 -; RV64-NEXT: vxor.vv v11, v11, v21 -; RV64-NEXT: vor.vv v10, v2, v10 -; RV64-NEXT: vxor.vv v11, v11, v17 -; RV64-NEXT: vxor.vv v11, v11, v12 -; RV64-NEXT: vsrl.vx v9, v9, a1 -; RV64-NEXT: vand.vx v9, v9, s1 -; RV64-NEXT: vxor.vv v11, v11, v18 -; RV64-NEXT: vxor.vv v11, v11, v13 -; RV64-NEXT: vxor.vv v8, v11, v8 -; RV64-NEXT: vsrl.vx v8, v8, a0 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vand.vx v9, v9, s4 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vand.vx v8, v8, s2 -; RV64-NEXT: vand.vx v9, v9, s2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v8, v8, s3 -; RV64-NEXT: vand.vx v9, v9, s3 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 224 -; RV64-NEXT: ret - %a = call @llvm.clmulr.nxv1i64( %x, %y) - ret %a -} - -define @clmulr_nxv2i64( %x, %y) nounwind { -; RV32-LABEL: clmulr_nxv2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s7, 1044480 -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li s8, 2 -; RV32-NEXT: li s9, 4 -; RV32-NEXT: li s10, 8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: li a5, 64 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: li s11, 256 -; RV32-NEXT: li ra, 512 -; RV32-NEXT: li a0, 1024 -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: lui t0, 2 -; RV32-NEXT: lui t1, 4 -; RV32-NEXT: lui t2, 8 -; RV32-NEXT: lui t3, 16 -; RV32-NEXT: lui t4, 32 -; RV32-NEXT: lui t5, 64 -; RV32-NEXT: lui t6, 128 -; RV32-NEXT: lui s0, 256 -; RV32-NEXT: lui s1, 512 -; RV32-NEXT: lui s2, 1024 -; RV32-NEXT: lui s3, 2048 -; RV32-NEXT: lui s4, 4096 -; RV32-NEXT: lui s5, 8192 -; RV32-NEXT: lui s6, 16384 -; RV32-NEXT: sw s7, 272(sp) -; RV32-NEXT: lui s7, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw a7, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw a1, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw s8, 252(sp) -; RV32-NEXT: lui s8, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw s9, 244(sp) -; RV32-NEXT: lui s9, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw s10, 236(sp) -; RV32-NEXT: lui s10, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw a3, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw a4, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw a5, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw a6, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw s11, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw ra, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a0, 180(sp) -; RV32-NEXT: slli a5, a1, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw a5, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a2, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw t0, 156(sp) -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw t1, 148(sp) -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw t2, 140(sp) -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw t3, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw t4, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw t5, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw t6, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s0, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s1, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s2, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s3, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s4, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s5, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s6, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s7, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw s8, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw s9, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw s10, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw a7, 12(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v4, a0 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vmv.v.x v2, a0 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: addi a0, sp, 272 -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v6, (a0), zero -; RV32-NEXT: addi a0, sp, 264 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: addi a0, sp, 248 -; RV32-NEXT: vlse64.v v14, (a0), zero -; RV32-NEXT: addi a0, sp, 240 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: addi a0, sp, 232 -; RV32-NEXT: vlse64.v v18, (a0), zero -; RV32-NEXT: addi a0, sp, 224 -; RV32-NEXT: vlse64.v v20, (a0), zero -; RV32-NEXT: addi a0, sp, 216 -; RV32-NEXT: vlse64.v v22, (a0), zero -; RV32-NEXT: li ra, 56 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vsrl.vx v26, v8, ra -; RV32-NEXT: li s11, 40 -; RV32-NEXT: vsrl.vx v28, v8, s11 -; RV32-NEXT: vsll.vx v30, v8, ra -; RV32-NEXT: addi a4, t3, -256 -; RV32-NEXT: vand.vx v28, v28, a4 -; RV32-NEXT: vor.vv v26, v28, v26 -; RV32-NEXT: vand.vx v28, v8, a4 -; RV32-NEXT: vsll.vx v28, v28, s11 -; RV32-NEXT: vor.vv v30, v30, v28 -; RV32-NEXT: vsrl.vi v28, v8, 8 -; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v28, v28, v6 -; RV32-NEXT: vor.vv v28, v28, v24 -; RV32-NEXT: addi a3, sp, 208 -; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: vor.vv v10, v28, v26 -; RV32-NEXT: vand.vx v26, v8, a6 -; RV32-NEXT: vsll.vi v26, v26, 24 -; RV32-NEXT: vand.vv v8, v8, v6 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v26, v8 -; RV32-NEXT: addi a3, sp, 200 -; RV32-NEXT: vlse64.v v28, (a3), zero -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: addi a3, sp, 192 -; RV32-NEXT: vlse64.v v26, (a3), zero -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v30, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v4 -; RV32-NEXT: vand.vv v30, v30, v4 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: vsrl.vi v30, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v2, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v2 -; RV32-NEXT: vand.vv v30, v30, v2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: vsrl.vi v30, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v0, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vand.vv v30, v30, v0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: addi a3, sp, 184 -; RV32-NEXT: vlse64.v v30, (a3), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v6, v8, v10 -; RV32-NEXT: vand.vv v4, v8, v12 -; RV32-NEXT: vand.vv v2, v8, v14 -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vand.vv v10, v8, v18 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v22 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v28, v8, v28 -; RV32-NEXT: addi a3, sp, 176 -; RV32-NEXT: addi a0, sp, 168 -; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vand.vv v14, v8, v26 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v14, v8, v30 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a2, sp, 160 -; RV32-NEXT: addi a3, sp, 152 -; RV32-NEXT: addi a1, sp, 144 -; RV32-NEXT: addi a0, sp, 136 -; RV32-NEXT: vlse64.v v10, (a2), zero -; RV32-NEXT: vlse64.v v12, (a3), zero -; RV32-NEXT: vlse64.v v14, (a1), zero -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: addi a1, sp, 120 -; RV32-NEXT: addi a2, sp, 112 -; RV32-NEXT: addi a3, sp, 104 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: addi a1, sp, 88 -; RV32-NEXT: addi a2, sp, 80 -; RV32-NEXT: addi a3, sp, 72 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: addi a1, sp, 56 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 2 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 1 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 4 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 8 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 1024 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t1 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t2 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t3 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t4 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t6 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s1 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s2 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s3 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s4 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s6 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s7 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s8 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s9 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s10 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v6 -; RV32-NEXT: vmul.vv v14, v8, v4 -; RV32-NEXT: vmul.vv v16, v8, v2 -; RV32-NEXT: vmul.vv v18, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v20, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v20, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v22, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v22, v8, v22 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v24, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v26, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v26, v8, v26 -; RV32-NEXT: vmul.vv v28, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v30, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v30, v8, v30 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v6, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v4, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v2, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vxor.vv v8, v8, v14 -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v18 -; RV32-NEXT: vxor.vv v8, v8, v20 -; RV32-NEXT: vxor.vv v8, v8, v22 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v26 -; RV32-NEXT: vxor.vv v8, v8, v28 -; RV32-NEXT: vxor.vv v8, v8, v30 -; RV32-NEXT: vxor.vv v8, v8, v6 -; RV32-NEXT: vxor.vv v8, v8, v4 -; RV32-NEXT: vxor.vv v8, v8, v2 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vx v10, v8, ra -; RV32-NEXT: vsll.vx v12, v8, ra -; RV32-NEXT: vsrl.vx v14, v8, s11 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vand.vx v14, v14, a4 -; RV32-NEXT: vsrl.vi v18, v8, 24 -; RV32-NEXT: vand.vx v20, v8, a6 -; RV32-NEXT: vand.vx v18, v18, a6 -; RV32-NEXT: vsll.vx v16, v16, s11 -; RV32-NEXT: vsrl.vi v22, v8, 8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v22, v22, v24 -; RV32-NEXT: vor.vv v10, v14, v10 -; RV32-NEXT: vor.vv v14, v22, v18 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vsll.vi v18, v20, 24 -; RV32-NEXT: vor.vv v8, v18, v8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v14, v10 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_nxv2i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -224 -; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: li s3, 40 -; RV64-NEXT: lui s1, 16 -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v14, v8, 24 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: li t4, 255 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 209715 -; RV64-NEXT: lui t6, 349525 -; RV64-NEXT: li t5, 16 -; RV64-NEXT: li t3, 32 -; RV64-NEXT: li t2, 64 -; RV64-NEXT: li t0, 128 -; RV64-NEXT: li t1, 256 -; RV64-NEXT: li a4, 512 -; RV64-NEXT: li a3, 1024 -; RV64-NEXT: li s0, 1 -; RV64-NEXT: lui a2, 1 -; RV64-NEXT: lui a1, 2 -; RV64-NEXT: lui a0, 4 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v12, v8, a7 -; RV64-NEXT: vsrl.vx v18, v8, s3 -; RV64-NEXT: addi s2, s1, -256 -; RV64-NEXT: lui s1, 4080 -; RV64-NEXT: vand.vx v16, v14, s1 -; RV64-NEXT: slli t4, t4, 24 -; RV64-NEXT: vand.vx v20, v8, s1 -; RV64-NEXT: vsll.vx v14, v8, a7 -; RV64-NEXT: addi a7, a5, -241 -; RV64-NEXT: addi a6, a6, 819 -; RV64-NEXT: addi a5, t6, 1365 -; RV64-NEXT: slli t6, s0, 11 -; RV64-NEXT: slli s1, s0, 31 -; RV64-NEXT: sd s1, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s1, s0, 32 -; RV64-NEXT: sd s1, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s1, s0, 33 -; RV64-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s1, s0, 34 -; RV64-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s1, s0, 35 -; RV64-NEXT: sd s1, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s1, s0, 36 -; RV64-NEXT: sd s1, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s1, a7, 32 -; RV64-NEXT: add a7, a7, s1 -; RV64-NEXT: slli s1, a6, 32 -; RV64-NEXT: add a6, a6, s1 -; RV64-NEXT: slli s1, a5, 32 -; RV64-NEXT: add a5, a5, s1 -; RV64-NEXT: slli s1, s0, 37 -; RV64-NEXT: sd s1, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v18, v18, s2 -; RV64-NEXT: vand.vx v10, v10, t4 -; RV64-NEXT: vsll.vi v20, v20, 24 -; RV64-NEXT: vand.vx v22, v8, t4 -; RV64-NEXT: vand.vx v8, v8, s2 -; RV64-NEXT: vor.vv v12, v18, v12 -; RV64-NEXT: vor.vv v10, v10, v16 -; RV64-NEXT: vsll.vi v16, v22, 8 -; RV64-NEXT: vsll.vx v8, v8, s3 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vor.vv v12, v20, v16 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v10, v10, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v10, v10, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v10, v10, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vand.vx v10, v8, t5 -; RV64-NEXT: slli t5, s0, 38 -; RV64-NEXT: sd t5, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t3 -; RV64-NEXT: slli t3, s0, 39 -; RV64-NEXT: sd t3, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v14, v8, t2 -; RV64-NEXT: slli t2, s0, 40 -; RV64-NEXT: sd t2, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v24, v8, t0 -; RV64-NEXT: slli t0, s0, 41 -; RV64-NEXT: sd t0, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t1 -; RV64-NEXT: slli s6, s0, 42 -; RV64-NEXT: vand.vx v18, v8, a4 -; RV64-NEXT: slli s7, s0, 43 -; RV64-NEXT: vand.vx v20, v8, a3 -; RV64-NEXT: slli s8, s0, 44 -; RV64-NEXT: vand.vx v22, v8, t6 -; RV64-NEXT: slli s9, s0, 45 -; RV64-NEXT: vand.vx v26, v8, a2 -; RV64-NEXT: slli s10, s0, 46 -; RV64-NEXT: vand.vx v28, v8, a1 -; RV64-NEXT: slli s11, s0, 47 -; RV64-NEXT: vand.vx v30, v8, a0 -; RV64-NEXT: slli ra, s0, 48 -; RV64-NEXT: slli s4, s0, 49 -; RV64-NEXT: slli s3, s0, 50 -; RV64-NEXT: slli s1, s0, 51 -; RV64-NEXT: slli t6, s0, 52 -; RV64-NEXT: slli t5, s0, 53 -; RV64-NEXT: slli t3, s0, 54 -; RV64-NEXT: slli t2, s0, 55 -; RV64-NEXT: slli t1, s0, 56 -; RV64-NEXT: slli t0, s0, 57 -; RV64-NEXT: slli a4, s0, 58 -; RV64-NEXT: slli a3, s0, 59 -; RV64-NEXT: slli a2, s0, 60 -; RV64-NEXT: slli a1, s0, 61 -; RV64-NEXT: slli s0, s0, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vi v6, v8, 2 -; RV64-NEXT: vand.vi v4, v8, 1 -; RV64-NEXT: vand.vi v2, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v6, v8, v6 -; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v4 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v2 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v14 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v24 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v18 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v20 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v22 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v26 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v28 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v30 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 8 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 16 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 32 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 64 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 128 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 256 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 512 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 1024 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 2048 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 8192 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 16384 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 32768 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 65536 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 131072 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 262144 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv a5, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s6 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s7 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s8 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s9 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s10 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s11 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, ra -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s4 -; RV64-NEXT: vmul.vv v20, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s3 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: vand.vx v12, v8, s1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s1, vlenb -; RV64-NEXT: slli s1, s1, 3 -; RV64-NEXT: add s1, sp, s1 -; RV64-NEXT: addi s1, s1, 112 -; RV64-NEXT: vs2r.v v12, (s1) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs2r.v v12, (t6) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t5 -; RV64-NEXT: vmul.vv v6, v8, v12 -; RV64-NEXT: vand.vx v12, v8, t3 -; RV64-NEXT: vmul.vv v22, v8, v12 -; RV64-NEXT: vand.vx v12, v8, t2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: vand.vx v14, v8, t1 -; RV64-NEXT: vmul.vv v24, v8, v14 -; RV64-NEXT: vand.vx v14, v8, t0 -; RV64-NEXT: vmul.vv v14, v8, v14 -; RV64-NEXT: vand.vx v16, v8, a4 -; RV64-NEXT: vmul.vv v4, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a3 -; RV64-NEXT: vmul.vv v2, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a2 -; RV64-NEXT: vmul.vv v26, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: addi a1, sp, 112 -; RV64-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s0 -; RV64-NEXT: vmul.vv v18, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v28, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v28 -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v28, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v0, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v30, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: li a1, 56 -; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: vand.vx v0, v0, s2 -; RV64-NEXT: li a0, 40 -; RV64-NEXT: vsll.vx v0, v0, a0 -; RV64-NEXT: vor.vv v8, v8, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v0, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v28, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v28, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v30 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 112 -; RV64-NEXT: vl2r.v v30, (a2) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v30, v0, v30 -; RV64-NEXT: vxor.vv v20, v30, v20 -; RV64-NEXT: vxor.vv v10, v20, v10 -; RV64-NEXT: vsrl.vi v20, v28, 8 -; RV64-NEXT: vand.vx v20, v20, t4 -; RV64-NEXT: vsrl.vi v30, v0, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v30, v30, a2 -; RV64-NEXT: vor.vv v20, v20, v30 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 112 -; RV64-NEXT: vl2r.v v30, (a3) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v10, v10, v30 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 112 -; RV64-NEXT: vl2r.v v30, (a3) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v10, v10, v30 -; RV64-NEXT: vxor.vv v10, v10, v6 -; RV64-NEXT: vxor.vv v10, v10, v22 -; RV64-NEXT: vxor.vv v10, v10, v12 -; RV64-NEXT: vand.vx v12, v28, a2 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: vxor.vv v22, v10, v24 -; RV64-NEXT: vxor.vv v14, v22, v14 -; RV64-NEXT: vand.vx v22, v10, t4 -; RV64-NEXT: vsll.vi v22, v22, 8 -; RV64-NEXT: vor.vv v12, v12, v22 -; RV64-NEXT: vxor.vv v14, v14, v4 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vxor.vv v12, v14, v2 -; RV64-NEXT: vxor.vv v12, v12, v26 -; RV64-NEXT: vsrl.vx v10, v10, a0 -; RV64-NEXT: vand.vx v10, v10, s2 -; RV64-NEXT: addi a0, sp, 112 -; RV64-NEXT: vl2r.v v14, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v14 -; RV64-NEXT: vxor.vv v12, v12, v18 -; RV64-NEXT: vxor.vv v12, v12, v16 -; RV64-NEXT: vsrl.vx v12, v12, a1 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vor.vv v10, v20, v10 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v10, v10, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v10, v10, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v10, v10, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 224 -; RV64-NEXT: ret - %a = call @llvm.clmulr.nxv2i64( %x, %y) - ret %a -} - -define @clmulr_nxv4i64( %x, %y) nounwind { -; RV32-LABEL: clmulr_nxv4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s11, 1044480 -; RV32-NEXT: lui t6, 524288 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: li ra, 2 -; RV32-NEXT: li t4, 4 -; RV32-NEXT: li t2, 8 -; RV32-NEXT: li t5, 16 -; RV32-NEXT: li t3, 32 -; RV32-NEXT: li t1, 64 -; RV32-NEXT: li t0, 128 -; RV32-NEXT: li a7, 256 -; RV32-NEXT: li a6, 512 -; RV32-NEXT: li a3, 1024 -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: lui a4, 2 -; RV32-NEXT: lui a1, 4 -; RV32-NEXT: lui a5, 8 -; RV32-NEXT: lui s0, 16 -; RV32-NEXT: lui s1, 32 -; RV32-NEXT: lui s2, 64 -; RV32-NEXT: lui s3, 128 -; RV32-NEXT: lui s4, 256 -; RV32-NEXT: lui s5, 512 -; RV32-NEXT: lui s6, 1024 -; RV32-NEXT: lui s7, 2048 -; RV32-NEXT: lui s8, 4096 -; RV32-NEXT: lui s9, 8192 -; RV32-NEXT: lui s10, 16384 -; RV32-NEXT: sw s11, 272(sp) -; RV32-NEXT: lui s11, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw t6, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw a0, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw ra, 252(sp) -; RV32-NEXT: lui ra, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw t4, 244(sp) -; RV32-NEXT: lui t4, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw t2, 236(sp) -; RV32-NEXT: lui t2, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw t5, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw t3, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw t1, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw t0, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw a7, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw a6, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a3, 180(sp) -; RV32-NEXT: li t1, 1024 -; RV32-NEXT: slli a3, a0, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw a3, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a2, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw a4, 156(sp) -; RV32-NEXT: lui t3, 2 -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw a1, 148(sp) -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw a5, 140(sp) -; RV32-NEXT: lui t5, 8 -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw s0, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw s1, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw s2, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw s3, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s4, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s5, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s6, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s7, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s8, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s9, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s10, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s11, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw ra, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw t4, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw t2, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw t6, 12(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v28, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vmv.v.x v4, a1 -; RV32-NEXT: addi a1, sp, 272 -; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v0, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a6, 56 -; RV32-NEXT: vsrl.vi v20, v8, 24 -; RV32-NEXT: vsrl.vx v12, v8, a6 -; RV32-NEXT: li a5, 40 -; RV32-NEXT: vsrl.vx v16, v8, a5 -; RV32-NEXT: vsll.vx v24, v8, a6 -; RV32-NEXT: addi a2, s0, -256 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v16, v16, v12 -; RV32-NEXT: vand.vx v12, v8, a2 -; RV32-NEXT: vsll.vx v12, v12, a5 -; RV32-NEXT: vor.vv v12, v24, v12 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v20, v20, a4 -; RV32-NEXT: lui a7, 349525 -; RV32-NEXT: addi a7, a7, 1365 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v20, v24, v20 -; RV32-NEXT: vsetvli t0, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v24, a7 -; RV32-NEXT: vsetvli a7, zero, e64, m4, ta, ma -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: addi a7, sp, 264 -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi a7, sp, 256 -; RV32-NEXT: vlse64.v v12, (a7), zero -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vand.vv v16, v16, v28 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v4 -; RV32-NEXT: vand.vv v16, v16, v4 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: addi a7, sp, 248 -; RV32-NEXT: vlse64.v v16, (a7), zero -; RV32-NEXT: vand.vv v28, v8, v20 -; RV32-NEXT: addi a7, sp, 240 -; RV32-NEXT: addi t0, sp, 232 -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vand.vv v4, v8, v12 -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a7, sp, 224 -; RV32-NEXT: addi t0, sp, 216 -; RV32-NEXT: addi a1, sp, 208 -; RV32-NEXT: addi a0, sp, 200 -; RV32-NEXT: vlse64.v v12, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: vlse64.v v20, (a1), zero -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 192 -; RV32-NEXT: addi a1, sp, 184 -; RV32-NEXT: addi a7, sp, 176 -; RV32-NEXT: addi t0, sp, 168 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: addi a1, sp, 152 -; RV32-NEXT: addi a7, sp, 144 -; RV32-NEXT: addi t0, sp, 136 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: addi a1, sp, 120 -; RV32-NEXT: addi a7, sp, 112 -; RV32-NEXT: addi t0, sp, 104 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: addi a1, sp, 88 -; RV32-NEXT: addi a7, sp, 80 -; RV32-NEXT: addi t0, sp, 72 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: addi a1, sp, 56 -; RV32-NEXT: addi a7, sp, 48 -; RV32-NEXT: addi t0, sp, 40 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: addi t0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 8 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: lui a0, 4 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s7 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s8 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s9 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s10 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s11 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, ra -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v16, v8, v28 -; RV32-NEXT: vmul.vv v20, v8, v4 -; RV32-NEXT: vmul.vv v24, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v28, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v28, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v4, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v20 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v28 -; RV32-NEXT: vxor.vv v8, v8, v4 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vx v12, v8, a6 -; RV32-NEXT: vsrl.vx v16, v8, a5 -; RV32-NEXT: vsrl.vi v20, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vx v20, v20, a4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a4 -; RV32-NEXT: vand.vv v24, v8, v24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vsll.vx v24, v8, a6 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vsll.vx v8, v8, a5 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vor.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_nxv4i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -240 -; RV64-NEXT: sd ra, 232(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 224(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: li t0, 40 -; RV64-NEXT: lui a7, 16 -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v20, v8, 24 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: li t2, 255 -; RV64-NEXT: lui a3, 61681 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 349525 -; RV64-NEXT: li a2, 16 -; RV64-NEXT: li a1, 32 -; RV64-NEXT: li a0, 64 -; RV64-NEXT: li s9, 1 -; RV64-NEXT: li a6, 56 -; RV64-NEXT: vsrl.vx v16, v8, a6 -; RV64-NEXT: vsrl.vx v28, v8, t0 -; RV64-NEXT: addi t6, a7, -256 -; RV64-NEXT: lui a7, 4080 -; RV64-NEXT: vand.vx v24, v20, a7 -; RV64-NEXT: slli t2, t2, 24 -; RV64-NEXT: vand.vx v4, v8, a7 -; RV64-NEXT: vsll.vx v20, v8, a6 -; RV64-NEXT: addi a7, a3, -241 -; RV64-NEXT: addi a6, a4, 819 -; RV64-NEXT: addi a5, a5, 1365 -; RV64-NEXT: slli a3, s9, 11 -; RV64-NEXT: sd a3, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 31 -; RV64-NEXT: sd a3, 104(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 32 -; RV64-NEXT: sd a3, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 33 -; RV64-NEXT: sd a3, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 34 -; RV64-NEXT: sd a3, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 35 -; RV64-NEXT: sd a3, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 36 -; RV64-NEXT: sd a3, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 37 -; RV64-NEXT: sd a3, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 38 -; RV64-NEXT: sd a3, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 39 -; RV64-NEXT: sd a3, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 40 -; RV64-NEXT: sd a3, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 41 -; RV64-NEXT: sd a3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s6, s9, 42 -; RV64-NEXT: slli s7, s9, 43 -; RV64-NEXT: slli a3, a7, 32 -; RV64-NEXT: add a7, a7, a3 -; RV64-NEXT: slli a3, a6, 32 -; RV64-NEXT: add a6, a6, a3 -; RV64-NEXT: slli a3, a5, 32 -; RV64-NEXT: add a5, a5, a3 -; RV64-NEXT: slli s8, s9, 44 -; RV64-NEXT: vand.vx v28, v28, t6 -; RV64-NEXT: vand.vx v12, v12, t2 -; RV64-NEXT: vsll.vi v4, v4, 24 -; RV64-NEXT: vand.vx v0, v8, t2 -; RV64-NEXT: vand.vx v8, v8, t6 -; RV64-NEXT: vor.vv v16, v28, v16 -; RV64-NEXT: vor.vv v12, v12, v24 -; RV64-NEXT: vsll.vi v24, v0, 8 -; RV64-NEXT: vsll.vx v8, v8, t0 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vor.vv v16, v4, v24 -; RV64-NEXT: vor.vv v8, v20, v8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v12, v12, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v12, v12, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v12, v12, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vand.vx v12, v8, a2 -; RV64-NEXT: slli s10, s9, 45 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: slli s11, s9, 46 -; RV64-NEXT: vand.vx v20, v8, a0 -; RV64-NEXT: slli ra, s9, 47 -; RV64-NEXT: slli s4, s9, 48 -; RV64-NEXT: slli s3, s9, 49 -; RV64-NEXT: slli s2, s9, 50 -; RV64-NEXT: slli s1, s9, 51 -; RV64-NEXT: slli s0, s9, 52 -; RV64-NEXT: slli t5, s9, 53 -; RV64-NEXT: slli t4, s9, 54 -; RV64-NEXT: slli t3, s9, 55 -; RV64-NEXT: slli t1, s9, 56 -; RV64-NEXT: slli t0, s9, 57 -; RV64-NEXT: slli a4, s9, 58 -; RV64-NEXT: slli a3, s9, 59 -; RV64-NEXT: slli a2, s9, 60 -; RV64-NEXT: slli a1, s9, 61 -; RV64-NEXT: slli s9, s9, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vi v24, v8, 2 -; RV64-NEXT: vand.vi v28, v8, 1 -; RV64-NEXT: vand.vi v4, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v24, v8, v24 -; RV64-NEXT: vmul.vv v28, v8, v28 -; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v28, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v28, v8, v4 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v28, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v20 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 128 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 256 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 512 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 1024 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 1 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 6 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 2 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 4 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 8 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 16 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 32 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 64 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 128 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 256 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 512 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 1024 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 2048 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 8192 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 16384 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 32768 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 65536 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 7 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 131072 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 262144 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv a5, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s7 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s8 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s10 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s11 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, ra -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s4, vlenb -; RV64-NEXT: slli s4, s4, 3 -; RV64-NEXT: mv s5, s4 -; RV64-NEXT: slli s4, s4, 2 -; RV64-NEXT: add s4, s4, s5 -; RV64-NEXT: add s4, sp, s4 -; RV64-NEXT: addi s4, s4, 128 -; RV64-NEXT: vs4r.v v12, (s4) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s3, vlenb -; RV64-NEXT: slli s3, s3, 2 -; RV64-NEXT: mv s4, s3 -; RV64-NEXT: slli s3, s3, 2 -; RV64-NEXT: add s3, s3, s4 -; RV64-NEXT: add s3, sp, s3 -; RV64-NEXT: addi s3, s3, 128 -; RV64-NEXT: vs4r.v v12, (s3) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s2 -; RV64-NEXT: vmul.vv v4, v8, v12 -; RV64-NEXT: vand.vx v12, v8, s1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s1, vlenb -; RV64-NEXT: slli s1, s1, 2 -; RV64-NEXT: mv s2, s1 -; RV64-NEXT: slli s1, s1, 2 -; RV64-NEXT: add s2, s2, s1 -; RV64-NEXT: slli s1, s1, 1 -; RV64-NEXT: add s1, s1, s2 -; RV64-NEXT: add s1, sp, s1 -; RV64-NEXT: addi s1, s1, 128 -; RV64-NEXT: vs4r.v v12, (s1) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s0 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s0, vlenb -; RV64-NEXT: slli s0, s0, 2 -; RV64-NEXT: mv s1, s0 -; RV64-NEXT: slli s0, s0, 1 -; RV64-NEXT: add s1, s1, s0 -; RV64-NEXT: slli s0, s0, 2 -; RV64-NEXT: add s0, s0, s1 -; RV64-NEXT: add s0, sp, s0 -; RV64-NEXT: addi s0, s0, 128 -; RV64-NEXT: vs4r.v v12, (s0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t5, vlenb -; RV64-NEXT: slli t5, t5, 2 -; RV64-NEXT: mv s0, t5 -; RV64-NEXT: slli t5, t5, 3 -; RV64-NEXT: add t5, t5, s0 -; RV64-NEXT: add t5, sp, t5 -; RV64-NEXT: addi t5, t5, 128 -; RV64-NEXT: vs4r.v v12, (t5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t4, vlenb -; RV64-NEXT: slli t4, t4, 4 -; RV64-NEXT: add t4, sp, t4 -; RV64-NEXT: addi t4, t4, 128 -; RV64-NEXT: vs4r.v v12, (t4) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t3, vlenb -; RV64-NEXT: slli t3, t3, 2 -; RV64-NEXT: add t3, sp, t3 -; RV64-NEXT: addi t3, t3, 128 -; RV64-NEXT: vs4r.v v12, (t3) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t1 -; RV64-NEXT: vmul.vv v20, v8, v12 -; RV64-NEXT: vand.vx v12, v8, t0 -; RV64-NEXT: vmul.vv v16, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 128 -; RV64-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: mv a4, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a4, a4, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vs4r.v v12, (a3) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vs4r.v v12, (a2) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv a2, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s9 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v12 -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v12 -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v12, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: li a1, 56 -; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: vand.vx v12, v12, t6 -; RV64-NEXT: li a0, 40 -; RV64-NEXT: vsll.vx v12, v12, a0 -; RV64-NEXT: vor.vv v12, v8, v12 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v8, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v0, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 7 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 6 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v24, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v0, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v28, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v28 -; RV64-NEXT: vxor.vv v24, v24, v4 -; RV64-NEXT: vsrl.vi v4, v8, 8 -; RV64-NEXT: vand.vx v4, v4, t2 -; RV64-NEXT: vsrl.vi v0, v0, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v0, v0, a2 -; RV64-NEXT: vor.vv v4, v4, v0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: mv a4, a3 -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: add a4, a4, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl4r.v v0, (a3) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: mv a4, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a4, a4, a3 -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl4r.v v0, (a3) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: mv a4, a3 -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl4r.v v0, (a3) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl4r.v v28, (a3) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v28 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl4r.v v28, (a3) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v28 -; RV64-NEXT: vxor.vv v20, v24, v20 -; RV64-NEXT: vxor.vv v16, v20, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vand.vx v20, v24, t2 -; RV64-NEXT: vsll.vi v20, v20, 8 -; RV64-NEXT: vor.vv v8, v8, v20 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v20, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v20 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v12, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v16, v12 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl4r.v v16, (a2) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v16 -; RV64-NEXT: vsrl.vx v16, v24, a0 -; RV64-NEXT: vand.vx v16, v16, t6 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v20 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v20 -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v12, v12, v20 -; RV64-NEXT: vsrl.vx v12, v12, a1 -; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vor.vv v12, v4, v12 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v12, v12, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v12, v12, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v12, v12, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 232(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 224(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 240 -; RV64-NEXT: ret - %a = call @llvm.clmulr.nxv4i64( %x, %y) - ret %a -} - -define @clmulr_nxv8i64( %x, %y) nounwind { -; RV32-LABEL: clmulr_nxv8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s11, 1044480 -; RV32-NEXT: lui s0, 524288 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: li ra, 2 -; RV32-NEXT: li t4, 4 -; RV32-NEXT: li t2, 8 -; RV32-NEXT: li t6, 16 -; RV32-NEXT: li t5, 32 -; RV32-NEXT: li t3, 64 -; RV32-NEXT: li t1, 128 -; RV32-NEXT: li t0, 256 -; RV32-NEXT: li a7, 512 -; RV32-NEXT: li a6, 1024 -; RV32-NEXT: lui a4, 1 -; RV32-NEXT: lui a3, 2 -; RV32-NEXT: lui a2, 4 -; RV32-NEXT: lui a5, 8 -; RV32-NEXT: lui s1, 16 -; RV32-NEXT: lui a1, 32 -; RV32-NEXT: lui s2, 64 -; RV32-NEXT: lui s3, 128 -; RV32-NEXT: lui s4, 256 -; RV32-NEXT: lui s5, 512 -; RV32-NEXT: lui s6, 1024 -; RV32-NEXT: lui s7, 2048 -; RV32-NEXT: lui s8, 4096 -; RV32-NEXT: lui s9, 8192 -; RV32-NEXT: lui s10, 16384 -; RV32-NEXT: sw s11, 272(sp) -; RV32-NEXT: lui s11, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw s0, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw a0, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw ra, 252(sp) -; RV32-NEXT: lui ra, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw t4, 244(sp) -; RV32-NEXT: lui t4, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw t2, 236(sp) -; RV32-NEXT: lui t2, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw t6, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw t5, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw t3, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw t1, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw t0, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw a7, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a6, 180(sp) -; RV32-NEXT: li t1, 1024 -; RV32-NEXT: slli t6, a0, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw t6, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a4, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw a3, 156(sp) -; RV32-NEXT: lui t3, 2 -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw a2, 148(sp) -; RV32-NEXT: lui t5, 4 -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw a5, 140(sp) -; RV32-NEXT: lui a4, 8 -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw s1, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw a1, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw s2, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw s3, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s4, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s5, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s6, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s7, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s8, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s9, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s10, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s11, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw ra, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw t4, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw t2, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw s0, 12(sp) -; RV32-NEXT: li a6, 56 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a6 -; RV32-NEXT: li a5, 40 -; RV32-NEXT: vsrl.vx v24, v8, a5 -; RV32-NEXT: vsll.vx v0, v8, a6 -; RV32-NEXT: addi a2, s1, -256 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: vsll.vx v24, v24, a5 -; RV32-NEXT: vor.vv v16, v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a3, sp, 272 -; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v16, v0, a3 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vmv8r.v v0, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v24, v24, v8 -; RV32-NEXT: vor.vv v24, v24, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vor.vv v16, v24, v8 -; RV32-NEXT: vand.vx v24, v0, a3 -; RV32-NEXT: vsll.vi v24, v24, 24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v0, v0, v8 -; RV32-NEXT: vsll.vi v0, v0, 8 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: lui a7, 61681 -; RV32-NEXT: addi a7, a7, -241 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsetvli t0, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui a7, 209715 -; RV32-NEXT: addi a7, a7, 819 -; RV32-NEXT: vsetvli t0, zero, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsetvli t0, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vsetvli a7, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v16, v16, v8 -; RV32-NEXT: lui a7, 349525 -; RV32-NEXT: addi a7, a7, 1365 -; RV32-NEXT: vsetvli t0, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a7 -; RV32-NEXT: vsetvli a7, zero, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vmv8r.v v0, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 9 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a7, sp, 264 -; RV32-NEXT: vlse64.v v24, (a7), zero -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v16, v16, v16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a7, sp, 256 -; RV32-NEXT: addi t0, sp, 248 -; RV32-NEXT: addi a1, sp, 240 -; RV32-NEXT: addi a0, sp, 232 -; RV32-NEXT: vlse64.v v16, (a7), zero -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: slli a7, a7, 4 -; RV32-NEXT: mv s0, a7 -; RV32-NEXT: slli a7, a7, 1 -; RV32-NEXT: add s0, s0, a7 -; RV32-NEXT: slli a7, a7, 1 -; RV32-NEXT: add s0, s0, a7 -; RV32-NEXT: slli a7, a7, 1 -; RV32-NEXT: add s0, s0, a7 -; RV32-NEXT: slli a7, a7, 1 -; RV32-NEXT: add a7, a7, s0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 288 -; RV32-NEXT: vs8r.v v16, (a7) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (t0), zero -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a0, sp, 224 -; RV32-NEXT: addi a1, sp, 216 -; RV32-NEXT: addi a7, sp, 208 -; RV32-NEXT: addi t0, sp, 200 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vlse64.v v0, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a0, sp, 192 -; RV32-NEXT: addi a1, sp, 184 -; RV32-NEXT: addi a7, sp, 176 -; RV32-NEXT: addi t0, sp, 168 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vlse64.v v0, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: addi a1, sp, 152 -; RV32-NEXT: addi a7, sp, 144 -; RV32-NEXT: addi t0, sp, 136 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vlse64.v v0, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: addi a1, sp, 120 -; RV32-NEXT: addi a7, sp, 112 -; RV32-NEXT: addi t0, sp, 104 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, s0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vlse64.v v0, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: addi a1, sp, 88 -; RV32-NEXT: addi a7, sp, 80 -; RV32-NEXT: addi t0, sp, 72 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, s0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vlse64.v v0, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: addi a1, sp, 56 -; RV32-NEXT: addi a7, sp, 48 -; RV32-NEXT: addi t0, sp, 40 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s0, s0, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, s0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vlse64.v v0, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: addi t0, sp, 8 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, s0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: vlse64.v v0, (a7), zero -; RV32-NEXT: vlse64.v v16, (t0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vi v16, v8, 2 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vi v16, v8, 1 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vi v16, v8, 4 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vi v16, v8, 8 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t1 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t6 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t3 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t5 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s1 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: lui a0, 32 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s2 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s3 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s4 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s5 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s6 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s7 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s8 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s9 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s10 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, s11 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, ra -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t4 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vand.vx v16, v8, t2 -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v24, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v16, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a5 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vsrl.vx v24, v8, a6 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v0, v0, v16 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vsll.vi v0, v0, 8 -; RV32-NEXT: vand.vx v16, v8, a3 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vsll.vx v0, v8, a6 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vsll.vx v8, v8, a5 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 9 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_nxv8i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -240 -; RV64-NEXT: sd ra, 232(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 224(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: li a1, 56 -; RV64-NEXT: li a2, 40 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v24, v8, 24 -; RV64-NEXT: vsrl.vx v16, v8, a1 -; RV64-NEXT: li a5, 56 -; RV64-NEXT: vsrl.vx v0, v8, a2 -; RV64-NEXT: li s5, 40 -; RV64-NEXT: addi s4, a3, -256 -; RV64-NEXT: vand.vx v0, v0, s4 -; RV64-NEXT: vor.vv v16, v0, v16 -; RV64-NEXT: vsrl.vi v0, v8, 8 -; RV64-NEXT: li a4, 255 -; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: li a0, 1 -; RV64-NEXT: lui a6, 4080 -; RV64-NEXT: vand.vx v24, v24, a6 -; RV64-NEXT: slli a4, a4, 24 -; RV64-NEXT: vand.vx v0, v0, a4 -; RV64-NEXT: vor.vv v24, v0, v24 -; RV64-NEXT: vand.vx v0, v8, a6 -; RV64-NEXT: vsll.vi v0, v0, 24 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vand.vx v24, v8, a4 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v24, v0, v24 -; RV64-NEXT: vsll.vx v0, v8, a5 -; RV64-NEXT: addi a7, a1, -241 -; RV64-NEXT: addi a6, a2, 819 -; RV64-NEXT: addi a5, a3, 1365 -; RV64-NEXT: slli a1, a0, 11 -; RV64-NEXT: sd a1, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 31 -; RV64-NEXT: sd a1, 104(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: sd a1, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 33 -; RV64-NEXT: sd a1, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 34 -; RV64-NEXT: sd a1, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 35 -; RV64-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 36 -; RV64-NEXT: sd a1, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 37 -; RV64-NEXT: sd a1, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 38 -; RV64-NEXT: sd a1, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 39 -; RV64-NEXT: sd a1, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 40 -; RV64-NEXT: sd a1, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a1, a0, 41 -; RV64-NEXT: sd a1, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s6, a0, 42 -; RV64-NEXT: slli s7, a0, 43 -; RV64-NEXT: slli s8, a0, 44 -; RV64-NEXT: slli s9, a0, 45 -; RV64-NEXT: slli s10, a0, 46 -; RV64-NEXT: slli a1, a7, 32 -; RV64-NEXT: add a7, a7, a1 -; RV64-NEXT: slli a1, a6, 32 -; RV64-NEXT: add a6, a6, a1 -; RV64-NEXT: slli a1, a5, 32 -; RV64-NEXT: add a5, a5, a1 -; RV64-NEXT: slli s11, a0, 47 -; RV64-NEXT: slli ra, a0, 48 -; RV64-NEXT: slli s3, a0, 49 -; RV64-NEXT: slli s2, a0, 50 -; RV64-NEXT: slli s1, a0, 51 -; RV64-NEXT: slli s0, a0, 52 -; RV64-NEXT: slli t6, a0, 53 -; RV64-NEXT: slli t5, a0, 54 -; RV64-NEXT: slli t4, a0, 55 -; RV64-NEXT: slli t3, a0, 56 -; RV64-NEXT: slli t2, a0, 57 -; RV64-NEXT: slli t1, a0, 58 -; RV64-NEXT: slli t0, a0, 59 -; RV64-NEXT: slli a3, a0, 60 -; RV64-NEXT: slli a2, a0, 61 -; RV64-NEXT: slli a1, a0, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vsll.vx v8, v8, s5 -; RV64-NEXT: vor.vv v8, v0, v8 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v16, v16, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v16, v16, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vand.vi v16, v8, 2 -; RV64-NEXT: vand.vi v24, v8, 1 -; RV64-NEXT: vand.vi v0, v8, 4 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: sd a1, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v24 -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vand.vi v16, v8, 8 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: li s5, 16 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: li s5, 32 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: li s5, 64 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: li s5, 128 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: li s5, 256 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: li s5, 512 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: li s5, 1024 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 1 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 2 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 4 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 8 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 7 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 16 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 32 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 64 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 128 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 256 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 512 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 1024 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 2048 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 8192 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 16384 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 32768 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 65536 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 131072 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: lui s5, 262144 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 8 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: mv s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: mv a1, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add a1, a1, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, a1 -; RV64-NEXT: ld a1, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s6 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s7 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 5 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s8 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s9 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s10 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s11 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, ra -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs8r.v v16, (s5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s3 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s3, vlenb -; RV64-NEXT: slli s3, s3, 3 -; RV64-NEXT: mv s5, s3 -; RV64-NEXT: slli s3, s3, 3 -; RV64-NEXT: add s3, s3, s5 -; RV64-NEXT: add s3, sp, s3 -; RV64-NEXT: addi s3, s3, 128 -; RV64-NEXT: vs8r.v v16, (s3) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s2 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s2, vlenb -; RV64-NEXT: slli s2, s2, 4 -; RV64-NEXT: mv s3, s2 -; RV64-NEXT: slli s2, s2, 1 -; RV64-NEXT: add s2, s2, s3 -; RV64-NEXT: add s2, sp, s2 -; RV64-NEXT: addi s2, s2, 128 -; RV64-NEXT: vs8r.v v16, (s2) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s1 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s1, vlenb -; RV64-NEXT: slli s1, s1, 7 -; RV64-NEXT: add s1, sp, s1 -; RV64-NEXT: addi s1, s1, 128 -; RV64-NEXT: vs8r.v v16, (s1) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, s0 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr s0, vlenb -; RV64-NEXT: slli s0, s0, 4 -; RV64-NEXT: mv s1, s0 -; RV64-NEXT: slli s0, s0, 1 -; RV64-NEXT: add s1, s1, s0 -; RV64-NEXT: slli s0, s0, 1 -; RV64-NEXT: add s0, s0, s1 -; RV64-NEXT: add s0, sp, s0 -; RV64-NEXT: addi s0, s0, 128 -; RV64-NEXT: vs8r.v v16, (s0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t6 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 5 -; RV64-NEXT: mv s0, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s0 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 128 -; RV64-NEXT: vs8r.v v16, (t6) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t5 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr t5, vlenb -; RV64-NEXT: slli t5, t5, 6 -; RV64-NEXT: add t5, sp, t5 -; RV64-NEXT: addi t5, t5, 128 -; RV64-NEXT: vs8r.v v16, (t5) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t4 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr t4, vlenb -; RV64-NEXT: slli t4, t4, 3 -; RV64-NEXT: mv t5, t4 -; RV64-NEXT: slli t4, t4, 2 -; RV64-NEXT: add t4, t4, t5 -; RV64-NEXT: add t4, sp, t4 -; RV64-NEXT: addi t4, t4, 128 -; RV64-NEXT: vs8r.v v16, (t4) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t3 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr t3, vlenb -; RV64-NEXT: slli t3, t3, 3 -; RV64-NEXT: mv t4, t3 -; RV64-NEXT: slli t3, t3, 1 -; RV64-NEXT: add t3, t3, t4 -; RV64-NEXT: add t3, sp, t3 -; RV64-NEXT: addi t3, t3, 128 -; RV64-NEXT: vs8r.v v16, (t3) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t2 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: slli t2, t2, 3 -; RV64-NEXT: add t2, sp, t2 -; RV64-NEXT: addi t2, t2, 128 -; RV64-NEXT: vs8r.v v16, (t2) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t1 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr t1, vlenb -; RV64-NEXT: slli t1, t1, 3 -; RV64-NEXT: mv t2, t1 -; RV64-NEXT: slli t1, t1, 1 -; RV64-NEXT: add t2, t2, t1 -; RV64-NEXT: slli t1, t1, 2 -; RV64-NEXT: add t1, t1, t2 -; RV64-NEXT: add t1, sp, t1 -; RV64-NEXT: addi t1, t1, 128 -; RV64-NEXT: vs8r.v v16, (t1) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t0 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr t0, vlenb -; RV64-NEXT: slli t0, t0, 4 -; RV64-NEXT: mv t1, t0 -; RV64-NEXT: slli t0, t0, 2 -; RV64-NEXT: add t0, t0, t1 -; RV64-NEXT: add t0, sp, t0 -; RV64-NEXT: addi t0, t0, 128 -; RV64-NEXT: vs8r.v v16, (t0) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, a3 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: mv t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add t0, t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a3, a3, t0 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vs8r.v v16, (a3) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, a2 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vmul.vv v16, v8, v16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 128 -; RV64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v16 -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v24, v8 -; RV64-NEXT: vxor.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 7 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v16, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsll.vx v8, v8, a0 -; RV64-NEXT: vand.vx v16, v16, s4 -; RV64-NEXT: li a1, 40 -; RV64-NEXT: vsll.vx v16, v16, a1 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v24, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 8 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 6 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v16, v8, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v16, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v8, v24 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: vsrl.vi v0, v16, 8 -; RV64-NEXT: vand.vx v0, v0, a4 -; RV64-NEXT: vsrl.vi v8, v8, 24 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vor.vv v8, v0, v8 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 5 -; RV64-NEXT: mv t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add t0, t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add t0, t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a3, a3, t0 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vs8r.v v8, (a3) # vscale x 64-byte Folded Spill -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 7 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl8r.v v8, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v24, v8 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 -; RV64-NEXT: mv t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add t0, t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a3, a3, t0 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 5 -; RV64-NEXT: mv t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a3, a3, t0 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 6 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: mv t0, a3 -; RV64-NEXT: slli a3, a3, 2 -; RV64-NEXT: add a3, a3, t0 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v8, v8, v24 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: mv t0, a3 -; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: add a3, a3, t0 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl8r.v v24, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v8, v24 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vl8r.v v0, (a3) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: vand.vx v16, v16, a2 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: vand.vx v0, v8, a4 -; RV64-NEXT: vsll.vi v0, v0, 8 -; RV64-NEXT: vor.vv v16, v16, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vor.vv v16, v0, v16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 2 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: mv a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a3, a2 -; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: addi a2, sp, 128 -; RV64-NEXT: vl8r.v v0, (a2) # vscale x 64-byte Folded Reload -; RV64-NEXT: vxor.vv v24, v24, v0 -; RV64-NEXT: vsrl.vx v8, v8, a1 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vsrl.vx v24, v24, a0 -; RV64-NEXT: vor.vv v8, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v16, v16, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v16, v16, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 232(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 224(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 240 -; RV64-NEXT: ret - %a = call @llvm.clmulr.nxv8i64( %x, %y) - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll index 1c00086064133..56379e0b55e10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll @@ -4627,14740 +4627,3 @@ define <8 x i64> @clmul_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { %a = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %a } - -define <1 x i32> @clmulr_v1i32(<1 x i32> %x, <1 x i32> %y) nounwind { -; CHECK-LABEL: clmulr_v1i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: lui a4, 16 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vsll.vi v11, v8, 24 -; CHECK-NEXT: lui a0, 61681 -; CHECK-NEXT: lui a1, 209715 -; CHECK-NEXT: lui a5, 349525 -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: addi a3, a4, -256 -; CHECK-NEXT: addi a2, a0, -241 -; CHECK-NEXT: addi a1, a1, 819 -; CHECK-NEXT: addi a0, a5, 1365 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v11, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vand.vx v9, v8, a6 -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: vand.vx v10, v8, a5 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vand.vx v11, v8, a5 -; CHECK-NEXT: li a5, 128 -; CHECK-NEXT: vand.vx v12, v8, a5 -; CHECK-NEXT: li a5, 256 -; CHECK-NEXT: vand.vx v13, v8, a5 -; CHECK-NEXT: li a5, 512 -; CHECK-NEXT: vand.vx v14, v8, a5 -; CHECK-NEXT: li a5, 1024 -; CHECK-NEXT: vand.vx v15, v8, a5 -; CHECK-NEXT: li a5, 1 -; CHECK-NEXT: slli a5, a5, 11 -; CHECK-NEXT: vand.vx v16, v8, a5 -; CHECK-NEXT: lui a5, 1 -; CHECK-NEXT: vand.vx v17, v8, a5 -; CHECK-NEXT: lui a5, 2 -; CHECK-NEXT: vand.vx v18, v8, a5 -; CHECK-NEXT: lui a5, 4 -; CHECK-NEXT: vand.vx v19, v8, a5 -; CHECK-NEXT: lui a5, 8 -; CHECK-NEXT: vand.vx v20, v8, a5 -; CHECK-NEXT: lui a5, 32 -; CHECK-NEXT: vand.vx v21, v8, a4 -; CHECK-NEXT: lui a4, 64 -; CHECK-NEXT: vand.vx v22, v8, a5 -; CHECK-NEXT: lui a5, 128 -; CHECK-NEXT: vand.vx v23, v8, a4 -; CHECK-NEXT: lui a4, 256 -; CHECK-NEXT: vand.vx v24, v8, a5 -; CHECK-NEXT: lui a5, 512 -; CHECK-NEXT: vand.vx v25, v8, a4 -; CHECK-NEXT: lui a4, 1024 -; CHECK-NEXT: vand.vx v26, v8, a5 -; CHECK-NEXT: lui a5, 2048 -; CHECK-NEXT: vand.vx v27, v8, a4 -; CHECK-NEXT: lui a4, 4096 -; CHECK-NEXT: vand.vx v28, v8, a5 -; CHECK-NEXT: lui a5, 8192 -; CHECK-NEXT: vand.vx v29, v8, a4 -; CHECK-NEXT: lui a4, 16384 -; CHECK-NEXT: vand.vx v30, v8, a5 -; CHECK-NEXT: lui a5, 32768 -; CHECK-NEXT: vand.vx v31, v8, a4 -; CHECK-NEXT: lui a4, 65536 -; CHECK-NEXT: vand.vx v7, v8, a5 -; CHECK-NEXT: lui a5, 131072 -; CHECK-NEXT: vand.vx v6, v8, a4 -; CHECK-NEXT: lui a4, 262144 -; CHECK-NEXT: vand.vx v5, v8, a5 -; CHECK-NEXT: lui a5, 524288 -; CHECK-NEXT: vand.vi v4, v8, 2 -; CHECK-NEXT: vand.vi v3, v8, 1 -; CHECK-NEXT: vand.vi v2, v8, 4 -; CHECK-NEXT: vand.vi v1, v8, 8 -; CHECK-NEXT: vand.vx v0, v8, a4 -; CHECK-NEXT: vmul.vv v4, v8, v4 -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs1r.v v4, (a4) # vscale x 8-byte Folded Spill -; CHECK-NEXT: vmul.vv v3, v8, v3 -; CHECK-NEXT: vmul.vv v2, v8, v2 -; CHECK-NEXT: vmul.vv v1, v8, v1 -; CHECK-NEXT: vmul.vv v9, v8, v9 -; CHECK-NEXT: vmul.vv v10, v8, v10 -; CHECK-NEXT: vmul.vv v11, v8, v11 -; CHECK-NEXT: vmul.vv v12, v8, v12 -; CHECK-NEXT: vmul.vv v13, v8, v13 -; CHECK-NEXT: vmul.vv v14, v8, v14 -; CHECK-NEXT: vmul.vv v15, v8, v15 -; CHECK-NEXT: vmul.vv v16, v8, v16 -; CHECK-NEXT: vmul.vv v17, v8, v17 -; CHECK-NEXT: vmul.vv v18, v8, v18 -; CHECK-NEXT: vmul.vv v19, v8, v19 -; CHECK-NEXT: vmul.vv v20, v8, v20 -; CHECK-NEXT: vmul.vv v21, v8, v21 -; CHECK-NEXT: vmul.vv v22, v8, v22 -; CHECK-NEXT: vmul.vv v23, v8, v23 -; CHECK-NEXT: vmul.vv v24, v8, v24 -; CHECK-NEXT: vmul.vv v25, v8, v25 -; CHECK-NEXT: vmul.vv v26, v8, v26 -; CHECK-NEXT: vmul.vv v27, v8, v27 -; CHECK-NEXT: vmul.vv v28, v8, v28 -; CHECK-NEXT: vmul.vv v29, v8, v29 -; CHECK-NEXT: vmul.vv v30, v8, v30 -; CHECK-NEXT: vmul.vv v31, v8, v31 -; CHECK-NEXT: vmul.vv v7, v8, v7 -; CHECK-NEXT: vmul.vv v6, v8, v6 -; CHECK-NEXT: vmul.vv v5, v8, v5 -; CHECK-NEXT: vmul.vv v0, v8, v0 -; CHECK-NEXT: vand.vx v4, v8, a5 -; CHECK-NEXT: vmul.vv v8, v8, v4 -; CHECK-NEXT: vl1r.v v4, (a4) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vxor.vv v4, v3, v4 -; CHECK-NEXT: vxor.vv v4, v4, v2 -; CHECK-NEXT: vxor.vv v4, v4, v1 -; CHECK-NEXT: vxor.vv v9, v4, v9 -; CHECK-NEXT: vxor.vv v9, v9, v10 -; CHECK-NEXT: vxor.vv v9, v9, v11 -; CHECK-NEXT: vxor.vv v9, v9, v12 -; CHECK-NEXT: vxor.vv v9, v9, v13 -; CHECK-NEXT: vxor.vv v9, v9, v14 -; CHECK-NEXT: vxor.vv v9, v9, v15 -; CHECK-NEXT: vxor.vv v9, v9, v16 -; CHECK-NEXT: vxor.vv v9, v9, v17 -; CHECK-NEXT: vxor.vv v9, v9, v18 -; CHECK-NEXT: vxor.vv v9, v9, v19 -; CHECK-NEXT: vxor.vv v9, v9, v20 -; CHECK-NEXT: vxor.vv v9, v9, v21 -; CHECK-NEXT: vxor.vv v9, v9, v22 -; CHECK-NEXT: vxor.vv v9, v9, v23 -; CHECK-NEXT: vxor.vv v9, v9, v24 -; CHECK-NEXT: vxor.vv v9, v9, v25 -; CHECK-NEXT: vxor.vv v9, v9, v26 -; CHECK-NEXT: vxor.vv v9, v9, v27 -; CHECK-NEXT: vxor.vv v9, v9, v28 -; CHECK-NEXT: vxor.vv v9, v9, v29 -; CHECK-NEXT: vxor.vv v9, v9, v30 -; CHECK-NEXT: vxor.vv v9, v9, v31 -; CHECK-NEXT: vxor.vv v9, v9, v7 -; CHECK-NEXT: vxor.vv v9, v9, v6 -; CHECK-NEXT: vxor.vv v9, v9, v5 -; CHECK-NEXT: vxor.vv v9, v9, v0 -; CHECK-NEXT: vxor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v10, v8, 24 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v10, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret - %a = call <1 x i32> @llvm.clmulr.v1i32(<1 x i32> %x, <1 x i32> %y) - ret <1 x i32> %a -} - -define <2 x i32> @clmulr_v2i32(<2 x i32> %x, <2 x i32> %y) nounwind { -; CHECK-LABEL: clmulr_v2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: lui a4, 16 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vsll.vi v11, v8, 24 -; CHECK-NEXT: lui a0, 61681 -; CHECK-NEXT: lui a1, 209715 -; CHECK-NEXT: lui a5, 349525 -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: addi a3, a4, -256 -; CHECK-NEXT: addi a2, a0, -241 -; CHECK-NEXT: addi a1, a1, 819 -; CHECK-NEXT: addi a0, a5, 1365 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v11, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vand.vx v9, v8, a6 -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: vand.vx v10, v8, a5 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vand.vx v11, v8, a5 -; CHECK-NEXT: li a5, 128 -; CHECK-NEXT: vand.vx v12, v8, a5 -; CHECK-NEXT: li a5, 256 -; CHECK-NEXT: vand.vx v13, v8, a5 -; CHECK-NEXT: li a5, 512 -; CHECK-NEXT: vand.vx v14, v8, a5 -; CHECK-NEXT: li a5, 1024 -; CHECK-NEXT: vand.vx v15, v8, a5 -; CHECK-NEXT: li a5, 1 -; CHECK-NEXT: slli a5, a5, 11 -; CHECK-NEXT: vand.vx v16, v8, a5 -; CHECK-NEXT: lui a5, 1 -; CHECK-NEXT: vand.vx v17, v8, a5 -; CHECK-NEXT: lui a5, 2 -; CHECK-NEXT: vand.vx v18, v8, a5 -; CHECK-NEXT: lui a5, 4 -; CHECK-NEXT: vand.vx v19, v8, a5 -; CHECK-NEXT: lui a5, 8 -; CHECK-NEXT: vand.vx v20, v8, a5 -; CHECK-NEXT: lui a5, 32 -; CHECK-NEXT: vand.vx v21, v8, a4 -; CHECK-NEXT: lui a4, 64 -; CHECK-NEXT: vand.vx v22, v8, a5 -; CHECK-NEXT: lui a5, 128 -; CHECK-NEXT: vand.vx v23, v8, a4 -; CHECK-NEXT: lui a4, 256 -; CHECK-NEXT: vand.vx v24, v8, a5 -; CHECK-NEXT: lui a5, 512 -; CHECK-NEXT: vand.vx v25, v8, a4 -; CHECK-NEXT: lui a4, 1024 -; CHECK-NEXT: vand.vx v26, v8, a5 -; CHECK-NEXT: lui a5, 2048 -; CHECK-NEXT: vand.vx v27, v8, a4 -; CHECK-NEXT: lui a4, 4096 -; CHECK-NEXT: vand.vx v28, v8, a5 -; CHECK-NEXT: lui a5, 8192 -; CHECK-NEXT: vand.vx v29, v8, a4 -; CHECK-NEXT: lui a4, 16384 -; CHECK-NEXT: vand.vx v30, v8, a5 -; CHECK-NEXT: lui a5, 32768 -; CHECK-NEXT: vand.vx v31, v8, a4 -; CHECK-NEXT: lui a4, 65536 -; CHECK-NEXT: vand.vx v7, v8, a5 -; CHECK-NEXT: lui a5, 131072 -; CHECK-NEXT: vand.vx v6, v8, a4 -; CHECK-NEXT: lui a4, 262144 -; CHECK-NEXT: vand.vx v5, v8, a5 -; CHECK-NEXT: lui a5, 524288 -; CHECK-NEXT: vand.vi v4, v8, 2 -; CHECK-NEXT: vand.vi v3, v8, 1 -; CHECK-NEXT: vand.vi v2, v8, 4 -; CHECK-NEXT: vand.vi v1, v8, 8 -; CHECK-NEXT: vand.vx v0, v8, a4 -; CHECK-NEXT: vmul.vv v4, v8, v4 -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs1r.v v4, (a4) # vscale x 8-byte Folded Spill -; CHECK-NEXT: vmul.vv v3, v8, v3 -; CHECK-NEXT: vmul.vv v2, v8, v2 -; CHECK-NEXT: vmul.vv v1, v8, v1 -; CHECK-NEXT: vmul.vv v9, v8, v9 -; CHECK-NEXT: vmul.vv v10, v8, v10 -; CHECK-NEXT: vmul.vv v11, v8, v11 -; CHECK-NEXT: vmul.vv v12, v8, v12 -; CHECK-NEXT: vmul.vv v13, v8, v13 -; CHECK-NEXT: vmul.vv v14, v8, v14 -; CHECK-NEXT: vmul.vv v15, v8, v15 -; CHECK-NEXT: vmul.vv v16, v8, v16 -; CHECK-NEXT: vmul.vv v17, v8, v17 -; CHECK-NEXT: vmul.vv v18, v8, v18 -; CHECK-NEXT: vmul.vv v19, v8, v19 -; CHECK-NEXT: vmul.vv v20, v8, v20 -; CHECK-NEXT: vmul.vv v21, v8, v21 -; CHECK-NEXT: vmul.vv v22, v8, v22 -; CHECK-NEXT: vmul.vv v23, v8, v23 -; CHECK-NEXT: vmul.vv v24, v8, v24 -; CHECK-NEXT: vmul.vv v25, v8, v25 -; CHECK-NEXT: vmul.vv v26, v8, v26 -; CHECK-NEXT: vmul.vv v27, v8, v27 -; CHECK-NEXT: vmul.vv v28, v8, v28 -; CHECK-NEXT: vmul.vv v29, v8, v29 -; CHECK-NEXT: vmul.vv v30, v8, v30 -; CHECK-NEXT: vmul.vv v31, v8, v31 -; CHECK-NEXT: vmul.vv v7, v8, v7 -; CHECK-NEXT: vmul.vv v6, v8, v6 -; CHECK-NEXT: vmul.vv v5, v8, v5 -; CHECK-NEXT: vmul.vv v0, v8, v0 -; CHECK-NEXT: vand.vx v4, v8, a5 -; CHECK-NEXT: vmul.vv v8, v8, v4 -; CHECK-NEXT: vl1r.v v4, (a4) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vxor.vv v4, v3, v4 -; CHECK-NEXT: vxor.vv v4, v4, v2 -; CHECK-NEXT: vxor.vv v4, v4, v1 -; CHECK-NEXT: vxor.vv v9, v4, v9 -; CHECK-NEXT: vxor.vv v9, v9, v10 -; CHECK-NEXT: vxor.vv v9, v9, v11 -; CHECK-NEXT: vxor.vv v9, v9, v12 -; CHECK-NEXT: vxor.vv v9, v9, v13 -; CHECK-NEXT: vxor.vv v9, v9, v14 -; CHECK-NEXT: vxor.vv v9, v9, v15 -; CHECK-NEXT: vxor.vv v9, v9, v16 -; CHECK-NEXT: vxor.vv v9, v9, v17 -; CHECK-NEXT: vxor.vv v9, v9, v18 -; CHECK-NEXT: vxor.vv v9, v9, v19 -; CHECK-NEXT: vxor.vv v9, v9, v20 -; CHECK-NEXT: vxor.vv v9, v9, v21 -; CHECK-NEXT: vxor.vv v9, v9, v22 -; CHECK-NEXT: vxor.vv v9, v9, v23 -; CHECK-NEXT: vxor.vv v9, v9, v24 -; CHECK-NEXT: vxor.vv v9, v9, v25 -; CHECK-NEXT: vxor.vv v9, v9, v26 -; CHECK-NEXT: vxor.vv v9, v9, v27 -; CHECK-NEXT: vxor.vv v9, v9, v28 -; CHECK-NEXT: vxor.vv v9, v9, v29 -; CHECK-NEXT: vxor.vv v9, v9, v30 -; CHECK-NEXT: vxor.vv v9, v9, v31 -; CHECK-NEXT: vxor.vv v9, v9, v7 -; CHECK-NEXT: vxor.vv v9, v9, v6 -; CHECK-NEXT: vxor.vv v9, v9, v5 -; CHECK-NEXT: vxor.vv v9, v9, v0 -; CHECK-NEXT: vxor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v10, v8, 24 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v10, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret - %a = call <2 x i32> @llvm.clmulr.v2i32(<2 x i32> %x, <2 x i32> %y) - ret <2 x i32> %a -} - -define <4 x i32> @clmulr_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: clmulr_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: lui a4, 16 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vsll.vi v11, v8, 24 -; CHECK-NEXT: lui a0, 61681 -; CHECK-NEXT: lui a1, 209715 -; CHECK-NEXT: lui a5, 349525 -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: addi a3, a4, -256 -; CHECK-NEXT: addi a2, a0, -241 -; CHECK-NEXT: addi a1, a1, 819 -; CHECK-NEXT: addi a0, a5, 1365 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v11, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vand.vx v9, v8, a6 -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: vand.vx v10, v8, a5 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vand.vx v11, v8, a5 -; CHECK-NEXT: li a5, 128 -; CHECK-NEXT: vand.vx v12, v8, a5 -; CHECK-NEXT: li a5, 256 -; CHECK-NEXT: vand.vx v13, v8, a5 -; CHECK-NEXT: li a5, 512 -; CHECK-NEXT: vand.vx v14, v8, a5 -; CHECK-NEXT: li a5, 1024 -; CHECK-NEXT: vand.vx v15, v8, a5 -; CHECK-NEXT: li a5, 1 -; CHECK-NEXT: slli a5, a5, 11 -; CHECK-NEXT: vand.vx v16, v8, a5 -; CHECK-NEXT: lui a5, 1 -; CHECK-NEXT: vand.vx v17, v8, a5 -; CHECK-NEXT: lui a5, 2 -; CHECK-NEXT: vand.vx v18, v8, a5 -; CHECK-NEXT: lui a5, 4 -; CHECK-NEXT: vand.vx v19, v8, a5 -; CHECK-NEXT: lui a5, 8 -; CHECK-NEXT: vand.vx v20, v8, a5 -; CHECK-NEXT: lui a5, 32 -; CHECK-NEXT: vand.vx v21, v8, a4 -; CHECK-NEXT: lui a4, 64 -; CHECK-NEXT: vand.vx v22, v8, a5 -; CHECK-NEXT: lui a5, 128 -; CHECK-NEXT: vand.vx v23, v8, a4 -; CHECK-NEXT: lui a4, 256 -; CHECK-NEXT: vand.vx v24, v8, a5 -; CHECK-NEXT: lui a5, 512 -; CHECK-NEXT: vand.vx v25, v8, a4 -; CHECK-NEXT: lui a4, 1024 -; CHECK-NEXT: vand.vx v26, v8, a5 -; CHECK-NEXT: lui a5, 2048 -; CHECK-NEXT: vand.vx v27, v8, a4 -; CHECK-NEXT: lui a4, 4096 -; CHECK-NEXT: vand.vx v28, v8, a5 -; CHECK-NEXT: lui a5, 8192 -; CHECK-NEXT: vand.vx v29, v8, a4 -; CHECK-NEXT: lui a4, 16384 -; CHECK-NEXT: vand.vx v30, v8, a5 -; CHECK-NEXT: lui a5, 32768 -; CHECK-NEXT: vand.vx v31, v8, a4 -; CHECK-NEXT: lui a4, 65536 -; CHECK-NEXT: vand.vx v7, v8, a5 -; CHECK-NEXT: lui a5, 131072 -; CHECK-NEXT: vand.vx v6, v8, a4 -; CHECK-NEXT: lui a4, 262144 -; CHECK-NEXT: vand.vx v5, v8, a5 -; CHECK-NEXT: lui a5, 524288 -; CHECK-NEXT: vand.vi v4, v8, 2 -; CHECK-NEXT: vand.vi v3, v8, 1 -; CHECK-NEXT: vand.vi v2, v8, 4 -; CHECK-NEXT: vand.vi v1, v8, 8 -; CHECK-NEXT: vand.vx v0, v8, a4 -; CHECK-NEXT: vmul.vv v4, v8, v4 -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs1r.v v4, (a4) # vscale x 8-byte Folded Spill -; CHECK-NEXT: vmul.vv v3, v8, v3 -; CHECK-NEXT: vmul.vv v2, v8, v2 -; CHECK-NEXT: vmul.vv v1, v8, v1 -; CHECK-NEXT: vmul.vv v9, v8, v9 -; CHECK-NEXT: vmul.vv v10, v8, v10 -; CHECK-NEXT: vmul.vv v11, v8, v11 -; CHECK-NEXT: vmul.vv v12, v8, v12 -; CHECK-NEXT: vmul.vv v13, v8, v13 -; CHECK-NEXT: vmul.vv v14, v8, v14 -; CHECK-NEXT: vmul.vv v15, v8, v15 -; CHECK-NEXT: vmul.vv v16, v8, v16 -; CHECK-NEXT: vmul.vv v17, v8, v17 -; CHECK-NEXT: vmul.vv v18, v8, v18 -; CHECK-NEXT: vmul.vv v19, v8, v19 -; CHECK-NEXT: vmul.vv v20, v8, v20 -; CHECK-NEXT: vmul.vv v21, v8, v21 -; CHECK-NEXT: vmul.vv v22, v8, v22 -; CHECK-NEXT: vmul.vv v23, v8, v23 -; CHECK-NEXT: vmul.vv v24, v8, v24 -; CHECK-NEXT: vmul.vv v25, v8, v25 -; CHECK-NEXT: vmul.vv v26, v8, v26 -; CHECK-NEXT: vmul.vv v27, v8, v27 -; CHECK-NEXT: vmul.vv v28, v8, v28 -; CHECK-NEXT: vmul.vv v29, v8, v29 -; CHECK-NEXT: vmul.vv v30, v8, v30 -; CHECK-NEXT: vmul.vv v31, v8, v31 -; CHECK-NEXT: vmul.vv v7, v8, v7 -; CHECK-NEXT: vmul.vv v6, v8, v6 -; CHECK-NEXT: vmul.vv v5, v8, v5 -; CHECK-NEXT: vmul.vv v0, v8, v0 -; CHECK-NEXT: vand.vx v4, v8, a5 -; CHECK-NEXT: vmul.vv v8, v8, v4 -; CHECK-NEXT: vl1r.v v4, (a4) # vscale x 8-byte Folded Reload -; CHECK-NEXT: vxor.vv v4, v3, v4 -; CHECK-NEXT: vxor.vv v4, v4, v2 -; CHECK-NEXT: vxor.vv v4, v4, v1 -; CHECK-NEXT: vxor.vv v9, v4, v9 -; CHECK-NEXT: vxor.vv v9, v9, v10 -; CHECK-NEXT: vxor.vv v9, v9, v11 -; CHECK-NEXT: vxor.vv v9, v9, v12 -; CHECK-NEXT: vxor.vv v9, v9, v13 -; CHECK-NEXT: vxor.vv v9, v9, v14 -; CHECK-NEXT: vxor.vv v9, v9, v15 -; CHECK-NEXT: vxor.vv v9, v9, v16 -; CHECK-NEXT: vxor.vv v9, v9, v17 -; CHECK-NEXT: vxor.vv v9, v9, v18 -; CHECK-NEXT: vxor.vv v9, v9, v19 -; CHECK-NEXT: vxor.vv v9, v9, v20 -; CHECK-NEXT: vxor.vv v9, v9, v21 -; CHECK-NEXT: vxor.vv v9, v9, v22 -; CHECK-NEXT: vxor.vv v9, v9, v23 -; CHECK-NEXT: vxor.vv v9, v9, v24 -; CHECK-NEXT: vxor.vv v9, v9, v25 -; CHECK-NEXT: vxor.vv v9, v9, v26 -; CHECK-NEXT: vxor.vv v9, v9, v27 -; CHECK-NEXT: vxor.vv v9, v9, v28 -; CHECK-NEXT: vxor.vv v9, v9, v29 -; CHECK-NEXT: vxor.vv v9, v9, v30 -; CHECK-NEXT: vxor.vv v9, v9, v31 -; CHECK-NEXT: vxor.vv v9, v9, v7 -; CHECK-NEXT: vxor.vv v9, v9, v6 -; CHECK-NEXT: vxor.vv v9, v9, v5 -; CHECK-NEXT: vxor.vv v9, v9, v0 -; CHECK-NEXT: vxor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 8 -; CHECK-NEXT: vsrl.vi v10, v8, 24 -; CHECK-NEXT: vand.vx v9, v9, a3 -; CHECK-NEXT: vor.vv v9, v9, v10 -; CHECK-NEXT: vsll.vi v10, v8, 24 -; CHECK-NEXT: vand.vx v8, v8, a3 -; CHECK-NEXT: vsll.vi v8, v8, 8 -; CHECK-NEXT: vor.vv v8, v10, v8 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vsrl.vi v9, v8, 4 -; CHECK-NEXT: vand.vx v8, v8, a2 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vsll.vi v8, v8, 4 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vsll.vi v8, v8, 2 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: vsrl.vi v9, v8, 1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vor.vv v8, v9, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret - %a = call <4 x i32> @llvm.clmulr.v4i32(<4 x i32> %x, <4 x i32> %y) - ret <4 x i32> %a -} - -define <8 x i32> @clmulr_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; RV32-LABEL: clmulr_v8i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: sw s0, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 52(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 48(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vsll.vi v14, v8, 24 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: lui s6, 349525 -; RV32-NEXT: li t2, 16 -; RV32-NEXT: li t6, 32 -; RV32-NEXT: li s3, 64 -; RV32-NEXT: li s5, 128 -; RV32-NEXT: li s4, 256 -; RV32-NEXT: li s2, 512 -; RV32-NEXT: li s1, 1024 -; RV32-NEXT: li s0, 1 -; RV32-NEXT: lui t5, 1 -; RV32-NEXT: lui t4, 2 -; RV32-NEXT: lui t3, 4 -; RV32-NEXT: lui a5, 8 -; RV32-NEXT: lui a6, 32 -; RV32-NEXT: lui a7, 64 -; RV32-NEXT: lui t0, 128 -; RV32-NEXT: lui t1, 256 -; RV32-NEXT: addi a4, a0, -256 -; RV32-NEXT: addi a3, a1, -241 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: addi a1, s6, 1365 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v14, v8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v10, v10, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v10, v10, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vand.vx v10, v8, t2 -; RV32-NEXT: lui t2, 512 -; RV32-NEXT: vand.vx v12, v8, t6 -; RV32-NEXT: lui t6, 1024 -; RV32-NEXT: vand.vx v14, v8, s3 -; RV32-NEXT: lui s3, 2048 -; RV32-NEXT: vand.vx v16, v8, s5 -; RV32-NEXT: lui s5, 4096 -; RV32-NEXT: vand.vx v26, v8, s4 -; RV32-NEXT: lui s4, 8192 -; RV32-NEXT: vand.vx v28, v8, s2 -; RV32-NEXT: lui s2, 16384 -; RV32-NEXT: vand.vx v18, v8, s1 -; RV32-NEXT: lui s1, 32768 -; RV32-NEXT: slli s0, s0, 11 -; RV32-NEXT: vand.vx v20, v8, s0 -; RV32-NEXT: lui s0, 65536 -; RV32-NEXT: vand.vx v22, v8, t5 -; RV32-NEXT: lui t5, 131072 -; RV32-NEXT: vand.vx v24, v8, t4 -; RV32-NEXT: lui t4, 262144 -; RV32-NEXT: vand.vx v30, v8, t3 -; RV32-NEXT: lui t3, 524288 -; RV32-NEXT: vand.vi v6, v8, 2 -; RV32-NEXT: vand.vi v4, v8, 1 -; RV32-NEXT: vand.vi v2, v8, 4 -; RV32-NEXT: vand.vi v0, v8, 8 -; RV32-NEXT: vmul.vv v6, v8, v6 -; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v6, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v6, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v6, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v26 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v18 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v22 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv s6, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, s6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v30 -; RV32-NEXT: csrr s6, vlenb -; RV32-NEXT: slli s6, s6, 1 -; RV32-NEXT: mv a0, s6 -; RV32-NEXT: slli s6, s6, 1 -; RV32-NEXT: add s6, s6, a0 -; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add s6, sp, s6 -; RV32-NEXT: addi s6, s6, 32 -; RV32-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 32 -; RV32-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a6 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a7 -; RV32-NEXT: vmul.vv v6, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t0 -; RV32-NEXT: vmul.vv v30, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t1 -; RV32-NEXT: vmul.vv v28, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t2 -; RV32-NEXT: vmul.vv v26, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t6 -; RV32-NEXT: vmul.vv v24, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s3 -; RV32-NEXT: vmul.vv v22, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s5 -; RV32-NEXT: vmul.vv v20, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s4 -; RV32-NEXT: vmul.vv v18, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s2 -; RV32-NEXT: vmul.vv v16, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s1 -; RV32-NEXT: vmul.vv v14, v8, v10 -; RV32-NEXT: vand.vx v10, v8, s0 -; RV32-NEXT: vmul.vv v12, v8, v10 -; RV32-NEXT: vand.vx v10, v8, t5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: vand.vx v0, v8, t4 -; RV32-NEXT: vmul.vv v0, v8, v0 -; RV32-NEXT: vand.vx v2, v8, t3 -; RV32-NEXT: vmul.vv v8, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v4, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v2, v2, v4 -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v4, v2, v4 -; RV32-NEXT: vxor.vv v6, v4, v6 -; RV32-NEXT: vxor.vv v30, v6, v30 -; RV32-NEXT: vxor.vv v28, v30, v28 -; RV32-NEXT: vxor.vv v26, v28, v26 -; RV32-NEXT: vxor.vv v24, v26, v24 -; RV32-NEXT: vxor.vv v22, v24, v22 -; RV32-NEXT: vxor.vv v20, v22, v20 -; RV32-NEXT: vxor.vv v18, v20, v18 -; RV32-NEXT: vxor.vv v16, v18, v16 -; RV32-NEXT: vxor.vv v14, v16, v14 -; RV32-NEXT: vxor.vv v12, v14, v12 -; RV32-NEXT: vxor.vv v10, v12, v10 -; RV32-NEXT: vxor.vv v10, v10, v0 -; RV32-NEXT: vxor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v12, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v10, v10, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v10, v10, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw s0, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 52(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 48(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_v8i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -96 -; RV64-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vsll.vi v14, v8, 24 -; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui s6, 349525 -; RV64-NEXT: li t2, 16 -; RV64-NEXT: li t6, 32 -; RV64-NEXT: li s3, 64 -; RV64-NEXT: li s5, 128 -; RV64-NEXT: li s4, 256 -; RV64-NEXT: li s2, 512 -; RV64-NEXT: li s1, 1024 -; RV64-NEXT: li s0, 1 -; RV64-NEXT: lui t5, 1 -; RV64-NEXT: lui t4, 2 -; RV64-NEXT: lui t3, 4 -; RV64-NEXT: lui a5, 8 -; RV64-NEXT: lui a6, 32 -; RV64-NEXT: lui a7, 64 -; RV64-NEXT: lui t0, 128 -; RV64-NEXT: lui t1, 256 -; RV64-NEXT: addi a4, a0, -256 -; RV64-NEXT: addi a3, a1, -241 -; RV64-NEXT: addi a2, a2, 819 -; RV64-NEXT: addi a1, s6, 1365 -; RV64-NEXT: vand.vx v10, v10, a4 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v10, v10, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v10, v10, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v10, v10, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vand.vx v10, v8, t2 -; RV64-NEXT: lui t2, 512 -; RV64-NEXT: vand.vx v12, v8, t6 -; RV64-NEXT: lui t6, 1024 -; RV64-NEXT: vand.vx v14, v8, s3 -; RV64-NEXT: lui s3, 2048 -; RV64-NEXT: vand.vx v16, v8, s5 -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: vand.vx v26, v8, s4 -; RV64-NEXT: lui s4, 8192 -; RV64-NEXT: vand.vx v28, v8, s2 -; RV64-NEXT: lui s2, 16384 -; RV64-NEXT: vand.vx v18, v8, s1 -; RV64-NEXT: lui s1, 32768 -; RV64-NEXT: slli s0, s0, 11 -; RV64-NEXT: vand.vx v20, v8, s0 -; RV64-NEXT: lui s0, 65536 -; RV64-NEXT: vand.vx v22, v8, t5 -; RV64-NEXT: lui t5, 131072 -; RV64-NEXT: vand.vx v24, v8, t4 -; RV64-NEXT: lui t4, 262144 -; RV64-NEXT: vand.vx v30, v8, t3 -; RV64-NEXT: lui t3, 524288 -; RV64-NEXT: vand.vi v6, v8, 2 -; RV64-NEXT: vand.vi v4, v8, 1 -; RV64-NEXT: vand.vi v2, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v6, v8, v6 -; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v14 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v26 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v18 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v20 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v22 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv s6, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, s6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v30 -; RV64-NEXT: csrr s6, vlenb -; RV64-NEXT: slli s6, s6, 1 -; RV64-NEXT: mv a0, s6 -; RV64-NEXT: slli s6, s6, 1 -; RV64-NEXT: add s6, s6, a0 -; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s6, sp, s6 -; RV64-NEXT: addi s6, s6, 32 -; RV64-NEXT: vs2r.v v10, (s6) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a0 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a6 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, a7 -; RV64-NEXT: vmul.vv v6, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t0 -; RV64-NEXT: vmul.vv v30, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t1 -; RV64-NEXT: vmul.vv v28, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t2 -; RV64-NEXT: vmul.vv v26, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t6 -; RV64-NEXT: vmul.vv v24, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s3 -; RV64-NEXT: vmul.vv v22, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v20, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s4 -; RV64-NEXT: vmul.vv v18, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s2 -; RV64-NEXT: vmul.vv v16, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s1 -; RV64-NEXT: vmul.vv v14, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s0 -; RV64-NEXT: vmul.vv v12, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: vand.vx v0, v8, t4 -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vand.vx v2, v8, t3 -; RV64-NEXT: vmul.vv v8, v8, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v4, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v2, v4 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v4, v2, v4 -; RV64-NEXT: vxor.vv v6, v4, v6 -; RV64-NEXT: vxor.vv v30, v6, v30 -; RV64-NEXT: vxor.vv v28, v30, v28 -; RV64-NEXT: vxor.vv v26, v28, v26 -; RV64-NEXT: vxor.vv v24, v26, v24 -; RV64-NEXT: vxor.vv v22, v24, v22 -; RV64-NEXT: vxor.vv v20, v22, v20 -; RV64-NEXT: vxor.vv v18, v20, v18 -; RV64-NEXT: vxor.vv v16, v18, v16 -; RV64-NEXT: vxor.vv v14, v16, v14 -; RV64-NEXT: vxor.vv v12, v14, v12 -; RV64-NEXT: vxor.vv v10, v12, v10 -; RV64-NEXT: vxor.vv v10, v10, v0 -; RV64-NEXT: vxor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: vsrl.vi v12, v8, 24 -; RV64-NEXT: vand.vx v10, v10, a4 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vsll.vi v12, v8, 24 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v10, v10, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v10, v10, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v10, v10, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 96 -; RV64-NEXT: ret - %a = call <8 x i32> @llvm.clmulr.v8i32(<8 x i32> %x, <8 x i32> %x) - ret <8 x i32> %a -} - -define <16 x i32> @clmulr_v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; RV32-LABEL: clmulr_v16i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -80 -; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 64(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 52(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 40(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: lui a5, 16 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vsll.vi v20, v8, 24 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: lui ra, 349525 -; RV32-NEXT: li s11, 16 -; RV32-NEXT: li s10, 32 -; RV32-NEXT: li s9, 64 -; RV32-NEXT: li a7, 512 -; RV32-NEXT: li t0, 1024 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: lui t1, 1 -; RV32-NEXT: lui t2, 2 -; RV32-NEXT: lui t3, 4 -; RV32-NEXT: lui t4, 8 -; RV32-NEXT: lui t5, 32 -; RV32-NEXT: lui t6, 64 -; RV32-NEXT: lui s0, 128 -; RV32-NEXT: lui s1, 256 -; RV32-NEXT: lui s2, 512 -; RV32-NEXT: lui s3, 1024 -; RV32-NEXT: lui s4, 2048 -; RV32-NEXT: lui s5, 4096 -; RV32-NEXT: lui s6, 8192 -; RV32-NEXT: lui s7, 16384 -; RV32-NEXT: lui s8, 32768 -; RV32-NEXT: addi a4, a5, -256 -; RV32-NEXT: addi a3, a1, -241 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: addi a1, ra, 1365 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vand.vx v12, v8, s11 -; RV32-NEXT: lui s11, 65536 -; RV32-NEXT: vand.vx v16, v8, s10 -; RV32-NEXT: lui s10, 131072 -; RV32-NEXT: vand.vx v20, v8, s9 -; RV32-NEXT: lui s9, 262144 -; RV32-NEXT: slli ra, a0, 11 -; RV32-NEXT: vand.vi v24, v8, 2 -; RV32-NEXT: vand.vi v28, v8, 1 -; RV32-NEXT: vand.vi v4, v8, 4 -; RV32-NEXT: vand.vi v0, v8, 8 -; RV32-NEXT: vmul.vv v24, v8, v24 -; RV32-NEXT: sw a4, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v24, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v24, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v24, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a4, a4, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: vand.vx v12, v8, a6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: mv a6, a4 -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, a4, a6 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a6, 256 -; RV32-NEXT: vand.vx v12, v8, a6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a4, a6 -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: add a6, a6, a4 -; RV32-NEXT: lw a4, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, a7 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 6 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, ra -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 4 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 1 -; RV32-NEXT: add a7, a7, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: slli a6, a6, 3 -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: slli a6, a6, 2 -; RV32-NEXT: add a6, a6, a7 -; RV32-NEXT: add a6, sp, a6 -; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, a5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 5 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a6, a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: mv a6, a5 -; RV32-NEXT: slli a5, a5, 1 -; RV32-NEXT: add a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 3 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 2 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s7 -; RV32-NEXT: vmul.vv v28, v8, v12 -; RV32-NEXT: vand.vx v12, v8, s8 -; RV32-NEXT: vmul.vv v24, v8, v12 -; RV32-NEXT: vand.vx v12, v8, s11 -; RV32-NEXT: vmul.vv v20, v8, v12 -; RV32-NEXT: vand.vx v12, v8, s10 -; RV32-NEXT: vmul.vv v16, v8, v12 -; RV32-NEXT: vand.vx v12, v8, s9 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: vand.vx v0, v8, a0 -; RV32-NEXT: vmul.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v4, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a5, a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v0, v0, v4 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v4, v0, v4 -; RV32-NEXT: vxor.vv v28, v4, v28 -; RV32-NEXT: vxor.vv v24, v28, v24 -; RV32-NEXT: vxor.vv v20, v24, v20 -; RV32-NEXT: vxor.vv v16, v20, v16 -; RV32-NEXT: vxor.vv v12, v16, v12 -; RV32-NEXT: vxor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v16, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vand.vx v8, v8, a2 -; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 64(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 52(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 48(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 40(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 36(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 32(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 80 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_v16i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -144 -; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 104(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: lui a5, 16 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vsll.vi v20, v8, 24 -; RV64-NEXT: lui a1, 61681 -; RV64-NEXT: lui a2, 209715 -; RV64-NEXT: lui ra, 349525 -; RV64-NEXT: li s11, 16 -; RV64-NEXT: li s10, 32 -; RV64-NEXT: li s9, 64 -; RV64-NEXT: li a7, 512 -; RV64-NEXT: li t0, 1024 -; RV64-NEXT: li a0, 1 -; RV64-NEXT: lui t1, 1 -; RV64-NEXT: lui t2, 2 -; RV64-NEXT: lui t3, 4 -; RV64-NEXT: lui t4, 8 -; RV64-NEXT: lui t5, 32 -; RV64-NEXT: lui t6, 64 -; RV64-NEXT: lui s0, 128 -; RV64-NEXT: lui s1, 256 -; RV64-NEXT: lui s2, 512 -; RV64-NEXT: lui s3, 1024 -; RV64-NEXT: lui s4, 2048 -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: lui s6, 8192 -; RV64-NEXT: lui s7, 16384 -; RV64-NEXT: lui s8, 32768 -; RV64-NEXT: addi a4, a5, -256 -; RV64-NEXT: addi a3, a1, -241 -; RV64-NEXT: addi a2, a2, 819 -; RV64-NEXT: addi a1, ra, 1365 -; RV64-NEXT: vand.vx v12, v12, a4 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v20, v8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v12, v12, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v12, v12, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v12, v12, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vand.vx v12, v8, s11 -; RV64-NEXT: lui s11, 65536 -; RV64-NEXT: vand.vx v16, v8, s10 -; RV64-NEXT: lui s10, 131072 -; RV64-NEXT: vand.vx v20, v8, s9 -; RV64-NEXT: lui s9, 262144 -; RV64-NEXT: slli ra, a0, 11 -; RV64-NEXT: vand.vi v24, v8, 2 -; RV64-NEXT: vand.vi v28, v8, 1 -; RV64-NEXT: vand.vi v4, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v24, v8, v24 -; RV64-NEXT: sd a4, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v28 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v20 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a4, a4, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui a0, 524288 -; RV64-NEXT: li a6, 128 -; RV64-NEXT: vand.vx v12, v8, a6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: mv a6, a4 -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, a4, a6 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 32 -; RV64-NEXT: vs4r.v v12, (a4) # vscale x 32-byte Folded Spill -; RV64-NEXT: li a6, 256 -; RV64-NEXT: vand.vx v12, v8, a6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a4, a6 -; RV64-NEXT: slli a6, a6, 4 -; RV64-NEXT: add a6, a6, a4 -; RV64-NEXT: ld a4, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a7 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 6 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t0 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, ra -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 3 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 4 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 1 -; RV64-NEXT: add a7, a7, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a6, vlenb -; RV64-NEXT: slli a6, a6, 3 -; RV64-NEXT: mv a7, a6 -; RV64-NEXT: slli a6, a6, 2 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: add a6, sp, a6 -; RV64-NEXT: addi a6, a6, 32 -; RV64-NEXT: vs4r.v v12, (a6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a6, a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s0 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv a6, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, a6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: addi a5, sp, 32 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s7 -; RV64-NEXT: vmul.vv v28, v8, v12 -; RV64-NEXT: vand.vx v12, v8, s8 -; RV64-NEXT: vmul.vv v24, v8, v12 -; RV64-NEXT: vand.vx v12, v8, s11 -; RV64-NEXT: vmul.vv v20, v8, v12 -; RV64-NEXT: vand.vx v12, v8, s10 -; RV64-NEXT: vmul.vv v16, v8, v12 -; RV64-NEXT: vand.vx v12, v8, s9 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: vand.vx v0, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v4, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a5, a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v4, v0, v4 -; RV64-NEXT: vxor.vv v28, v4, v28 -; RV64-NEXT: vxor.vv v24, v28, v24 -; RV64-NEXT: vxor.vv v20, v24, v20 -; RV64-NEXT: vxor.vv v16, v20, v16 -; RV64-NEXT: vxor.vv v12, v16, v12 -; RV64-NEXT: vxor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vsrl.vi v16, v8, 24 -; RV64-NEXT: vand.vx v12, v12, a4 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vsll.vi v16, v8, 24 -; RV64-NEXT: vand.vx v8, v8, a4 -; RV64-NEXT: vsll.vi v8, v8, 8 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vand.vx v12, v12, a3 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vand.vx v12, v12, a2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a1 -; RV64-NEXT: vand.vx v12, v12, a1 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 104(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 144 -; RV64-NEXT: ret - %a = call <16 x i32> @llvm.clmulr.v16i32(<16 x i32> %x, <16 x i32> %y) - ret <16 x i32> %a -} - -define <1 x i64> @clmulr_v1i64(<1 x i64> %x, <1 x i64> %y) nounwind { -; RV32-LABEL: clmulr_v1i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s7, 1044480 -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: li s11, 1 -; RV32-NEXT: li s8, 2 -; RV32-NEXT: li s9, 4 -; RV32-NEXT: li s10, 8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: li a5, 64 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: li ra, 256 -; RV32-NEXT: li a0, 512 -; RV32-NEXT: li a1, 1024 -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: lui t0, 2 -; RV32-NEXT: lui t1, 4 -; RV32-NEXT: lui t2, 8 -; RV32-NEXT: lui t3, 16 -; RV32-NEXT: lui t4, 32 -; RV32-NEXT: lui t5, 64 -; RV32-NEXT: lui t6, 128 -; RV32-NEXT: lui s0, 256 -; RV32-NEXT: lui s1, 512 -; RV32-NEXT: lui s2, 1024 -; RV32-NEXT: lui s3, 2048 -; RV32-NEXT: lui s4, 4096 -; RV32-NEXT: lui s5, 8192 -; RV32-NEXT: lui s6, 16384 -; RV32-NEXT: sw s7, 272(sp) -; RV32-NEXT: lui s7, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw a7, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw s11, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw s8, 252(sp) -; RV32-NEXT: lui s8, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw s9, 244(sp) -; RV32-NEXT: lui s9, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw s10, 236(sp) -; RV32-NEXT: lui s10, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw a3, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw a4, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw a5, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw a6, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw ra, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw a0, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a1, 180(sp) -; RV32-NEXT: slli s11, s11, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw s11, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a2, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw t0, 156(sp) -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw t1, 148(sp) -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw t2, 140(sp) -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw t3, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw t4, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw t5, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw t6, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s0, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s1, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s2, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s3, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s4, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s5, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s6, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s7, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw s8, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw s9, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw s10, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw a7, 12(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v3, a0 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vmv.v.x v2, a0 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vmv.v.x v1, a0 -; RV32-NEXT: addi a0, sp, 272 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v0, (a0), zero -; RV32-NEXT: addi a0, sp, 264 -; RV32-NEXT: vlse64.v v13, (a0), zero -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vlse64.v v14, (a0), zero -; RV32-NEXT: addi a0, sp, 248 -; RV32-NEXT: vlse64.v v15, (a0), zero -; RV32-NEXT: addi a0, sp, 240 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: addi a0, sp, 232 -; RV32-NEXT: vlse64.v v17, (a0), zero -; RV32-NEXT: addi a0, sp, 224 -; RV32-NEXT: vlse64.v v18, (a0), zero -; RV32-NEXT: addi a0, sp, 216 -; RV32-NEXT: vlse64.v v19, (a0), zero -; RV32-NEXT: addi a0, sp, 208 -; RV32-NEXT: vlse64.v v20, (a0), zero -; RV32-NEXT: addi a0, sp, 200 -; RV32-NEXT: vlse64.v v21, (a0), zero -; RV32-NEXT: addi a0, sp, 192 -; RV32-NEXT: vlse64.v v22, (a0), zero -; RV32-NEXT: addi a0, sp, 184 -; RV32-NEXT: vlse64.v v23, (a0), zero -; RV32-NEXT: addi a0, sp, 176 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: addi a0, sp, 168 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: addi a0, sp, 152 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: addi a0, sp, 136 -; RV32-NEXT: vlse64.v v29, (a0), zero -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: vlse64.v v30, (a0), zero -; RV32-NEXT: addi a0, sp, 120 -; RV32-NEXT: vlse64.v v31, (a0), zero -; RV32-NEXT: addi a0, sp, 112 -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: addi a0, sp, 104 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: vlse64.v v5, (a0), zero -; RV32-NEXT: addi a0, sp, 88 -; RV32-NEXT: vlse64.v v4, (a0), zero -; RV32-NEXT: li a6, 56 -; RV32-NEXT: vsrl.vi v27, v8, 24 -; RV32-NEXT: vsrl.vx v28, v8, a6 -; RV32-NEXT: li ra, 40 -; RV32-NEXT: vsrl.vx v7, v8, ra -; RV32-NEXT: vsll.vx v6, v8, a6 -; RV32-NEXT: addi a4, t3, -256 -; RV32-NEXT: vand.vx v7, v7, a4 -; RV32-NEXT: vor.vv v28, v7, v28 -; RV32-NEXT: vand.vx v7, v8, a4 -; RV32-NEXT: vsll.vx v7, v7, ra -; RV32-NEXT: vor.vv v7, v6, v7 -; RV32-NEXT: vsrl.vi v6, v8, 8 -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v27, v27, a5 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v6, v6, v0 -; RV32-NEXT: vor.vv v27, v6, v27 -; RV32-NEXT: addi a3, sp, 80 -; RV32-NEXT: vlse64.v v6, (a3), zero -; RV32-NEXT: vor.vv v27, v27, v28 -; RV32-NEXT: vand.vx v28, v8, a5 -; RV32-NEXT: vsll.vi v28, v28, 24 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v28, v8 -; RV32-NEXT: addi a3, sp, 72 -; RV32-NEXT: vlse64.v v28, (a3), zero -; RV32-NEXT: vor.vv v8, v7, v8 -; RV32-NEXT: addi a3, sp, 64 -; RV32-NEXT: vlse64.v v7, (a3), zero -; RV32-NEXT: vor.vv v8, v8, v27 -; RV32-NEXT: vsrl.vi v27, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v3 -; RV32-NEXT: vand.vv v27, v27, v3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: vsrl.vi v27, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v2 -; RV32-NEXT: vand.vv v27, v27, v2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: vsrl.vi v27, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v1 -; RV32-NEXT: vand.vv v27, v27, v1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: addi a3, sp, 56 -; RV32-NEXT: vlse64.v v27, (a3), zero -; RV32-NEXT: vand.vv v13, v8, v13 -; RV32-NEXT: vand.vv v14, v8, v14 -; RV32-NEXT: vand.vv v15, v8, v15 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vv v17, v8, v17 -; RV32-NEXT: vand.vv v18, v8, v18 -; RV32-NEXT: vand.vv v19, v8, v19 -; RV32-NEXT: vand.vv v20, v8, v20 -; RV32-NEXT: vand.vv v21, v8, v21 -; RV32-NEXT: vand.vv v22, v8, v22 -; RV32-NEXT: vand.vv v23, v8, v23 -; RV32-NEXT: vand.vv v24, v8, v24 -; RV32-NEXT: vand.vv v25, v8, v25 -; RV32-NEXT: vand.vv v26, v8, v26 -; RV32-NEXT: vand.vv v3, v8, v9 -; RV32-NEXT: vand.vv v2, v8, v10 -; RV32-NEXT: vand.vv v29, v8, v29 -; RV32-NEXT: vand.vv v30, v8, v30 -; RV32-NEXT: vand.vv v31, v8, v31 -; RV32-NEXT: vand.vv v0, v8, v11 -; RV32-NEXT: vand.vv v9, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v5, v8, v5 -; RV32-NEXT: vand.vv v4, v8, v4 -; RV32-NEXT: vand.vv v6, v8, v6 -; RV32-NEXT: vand.vv v9, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: addi a0, sp, 40 -; RV32-NEXT: vlse64.v v9, (a3), zero -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vand.vv v11, v8, v7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v11, v8, v27 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a2), zero -; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: vlse64.v v11, (a1), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v11 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 8 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 1024 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s11 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t3 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t5 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t6 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s3 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s5 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 2 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s6 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s7 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s8 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s9 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v1, v8, s10 -; RV32-NEXT: vmul.vv v1, v8, v1 -; RV32-NEXT: vmul.vv v9, v8, v13 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v14 -; RV32-NEXT: vmul.vv v11, v8, v15 -; RV32-NEXT: vmul.vv v12, v8, v16 -; RV32-NEXT: vmul.vv v13, v8, v17 -; RV32-NEXT: vmul.vv v14, v8, v18 -; RV32-NEXT: vmul.vv v15, v8, v19 -; RV32-NEXT: vmul.vv v16, v8, v20 -; RV32-NEXT: vmul.vv v17, v8, v21 -; RV32-NEXT: vmul.vv v18, v8, v22 -; RV32-NEXT: vmul.vv v19, v8, v23 -; RV32-NEXT: vmul.vv v20, v8, v24 -; RV32-NEXT: vmul.vv v21, v8, v25 -; RV32-NEXT: vmul.vv v22, v8, v26 -; RV32-NEXT: vmul.vv v23, v8, v3 -; RV32-NEXT: vmul.vv v24, v8, v2 -; RV32-NEXT: vmul.vv v25, v8, v29 -; RV32-NEXT: vmul.vv v26, v8, v30 -; RV32-NEXT: vmul.vv v27, v8, v31 -; RV32-NEXT: vmul.vv v28, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v29, v8, v29 -; RV32-NEXT: vmul.vv v30, v8, v5 -; RV32-NEXT: vmul.vv v31, v8, v4 -; RV32-NEXT: vmul.vv v7, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v6, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v5, v8, v5 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v4, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v3, v8, v3 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v2, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 2 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vxor.vv v8, v8, v1 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: vxor.vv v8, v8, v11 -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vxor.vv v8, v8, v13 -; RV32-NEXT: vxor.vv v8, v8, v14 -; RV32-NEXT: vxor.vv v8, v8, v15 -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v17 -; RV32-NEXT: vxor.vv v8, v8, v18 -; RV32-NEXT: vxor.vv v8, v8, v19 -; RV32-NEXT: vxor.vv v8, v8, v20 -; RV32-NEXT: vxor.vv v8, v8, v21 -; RV32-NEXT: vxor.vv v8, v8, v22 -; RV32-NEXT: vxor.vv v8, v8, v23 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v25 -; RV32-NEXT: vxor.vv v8, v8, v26 -; RV32-NEXT: vxor.vv v8, v8, v27 -; RV32-NEXT: vxor.vv v8, v8, v28 -; RV32-NEXT: vxor.vv v8, v8, v29 -; RV32-NEXT: vxor.vv v8, v8, v30 -; RV32-NEXT: vxor.vv v8, v8, v31 -; RV32-NEXT: vxor.vv v8, v8, v7 -; RV32-NEXT: vxor.vv v8, v8, v6 -; RV32-NEXT: vxor.vv v8, v8, v5 -; RV32-NEXT: vxor.vv v8, v8, v4 -; RV32-NEXT: vxor.vv v8, v8, v3 -; RV32-NEXT: vxor.vv v8, v8, v2 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vx v9, v8, a6 -; RV32-NEXT: vsll.vx v10, v8, a6 -; RV32-NEXT: vsrl.vx v11, v8, ra -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vand.vx v11, v11, a4 -; RV32-NEXT: vsrl.vi v13, v8, 24 -; RV32-NEXT: vand.vx v14, v8, a5 -; RV32-NEXT: vand.vx v13, v13, a5 -; RV32-NEXT: vsll.vx v12, v12, ra -; RV32-NEXT: vsrl.vi v15, v8, 8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v15, v15, v16 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vor.vv v11, v15, v13 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vsll.vi v13, v14, 24 -; RV32-NEXT: vor.vv v8, v13, v8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_v1i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -224 -; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: li s11, 56 -; RV64-NEXT: li ra, 40 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: li t2, 255 -; RV64-NEXT: lui t6, 61681 -; RV64-NEXT: lui s0, 209715 -; RV64-NEXT: lui s1, 349525 -; RV64-NEXT: li s10, 16 -; RV64-NEXT: li s9, 32 -; RV64-NEXT: li s8, 64 -; RV64-NEXT: li s7, 128 -; RV64-NEXT: li s5, 256 -; RV64-NEXT: li t5, 512 -; RV64-NEXT: li t3, 1024 -; RV64-NEXT: li t0, 1 -; RV64-NEXT: lui s6, 1 -; RV64-NEXT: lui s4, 2 -; RV64-NEXT: lui t4, 4 -; RV64-NEXT: lui t1, 8 -; RV64-NEXT: lui a7, 32 -; RV64-NEXT: lui a6, 64 -; RV64-NEXT: lui a5, 128 -; RV64-NEXT: lui a4, 256 -; RV64-NEXT: lui a3, 512 -; RV64-NEXT: lui a2, 1024 -; RV64-NEXT: vsrl.vx v11, v8, s11 -; RV64-NEXT: vsrl.vx v12, v8, ra -; RV64-NEXT: addi t6, t6, -241 -; RV64-NEXT: addi s2, s0, 819 -; RV64-NEXT: addi s3, s1, 1365 -; RV64-NEXT: slli s1, t6, 32 -; RV64-NEXT: add s1, t6, s1 -; RV64-NEXT: slli t6, s2, 32 -; RV64-NEXT: add s2, s2, t6 -; RV64-NEXT: slli t6, s3, 32 -; RV64-NEXT: add s3, s3, t6 -; RV64-NEXT: addi s0, a0, -256 -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: slli t6, t2, 24 -; RV64-NEXT: vand.vx v13, v8, a0 -; RV64-NEXT: vsll.vx v14, v8, s11 -; RV64-NEXT: vand.vx v12, v12, s0 -; RV64-NEXT: vand.vx v9, v9, t6 -; RV64-NEXT: vsll.vi v13, v13, 24 -; RV64-NEXT: vand.vx v15, v8, t6 -; RV64-NEXT: vand.vx v8, v8, s0 -; RV64-NEXT: vor.vv v11, v12, v11 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v15, 8 -; RV64-NEXT: vsll.vx v8, v8, ra -; RV64-NEXT: vor.vv v9, v9, v11 -; RV64-NEXT: vor.vv v10, v13, v10 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vand.vx v8, v8, s1 -; RV64-NEXT: vand.vx v9, v9, s1 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vand.vx v8, v8, s2 -; RV64-NEXT: vand.vx v9, v9, s2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v8, v8, s3 -; RV64-NEXT: vand.vx v9, v9, s3 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vand.vx v9, v8, s10 -; RV64-NEXT: lui t2, 4096 -; RV64-NEXT: vand.vx v10, v8, s9 -; RV64-NEXT: lui s9, 8192 -; RV64-NEXT: vand.vx v11, v8, s8 -; RV64-NEXT: lui s8, 16384 -; RV64-NEXT: vand.vx v12, v8, s7 -; RV64-NEXT: lui s10, 32768 -; RV64-NEXT: vand.vx v13, v8, s5 -; RV64-NEXT: lui s11, 65536 -; RV64-NEXT: vand.vx v14, v8, t5 -; RV64-NEXT: lui t5, 131072 -; RV64-NEXT: vand.vx v15, v8, t3 -; RV64-NEXT: slli t3, t0, 11 -; RV64-NEXT: vand.vx v16, v8, t3 -; RV64-NEXT: lui t3, 262144 -; RV64-NEXT: vand.vx v17, v8, s6 -; RV64-NEXT: slli a0, t0, 31 -; RV64-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v18, v8, s4 -; RV64-NEXT: slli a0, t0, 32 -; RV64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v19, v8, t4 -; RV64-NEXT: slli a0, t0, 33 -; RV64-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v20, v8, t1 -; RV64-NEXT: slli a0, t0, 34 -; RV64-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v21, v8, a1 -; RV64-NEXT: slli a0, t0, 35 -; RV64-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v22, v8, a7 -; RV64-NEXT: slli a0, t0, 36 -; RV64-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v23, v8, a6 -; RV64-NEXT: slli a0, t0, 37 -; RV64-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v24, v8, a5 -; RV64-NEXT: slli a0, t0, 38 -; RV64-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v25, v8, a4 -; RV64-NEXT: slli a0, t0, 39 -; RV64-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v26, v8, a3 -; RV64-NEXT: slli a0, t0, 40 -; RV64-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v27, v8, a2 -; RV64-NEXT: slli a0, t0, 41 -; RV64-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: lui a0, 2048 -; RV64-NEXT: vand.vx v28, v8, a0 -; RV64-NEXT: slli s5, t0, 42 -; RV64-NEXT: vand.vx v29, v8, t2 -; RV64-NEXT: slli s6, t0, 43 -; RV64-NEXT: vand.vx v30, v8, s9 -; RV64-NEXT: slli s7, t0, 44 -; RV64-NEXT: vand.vx v31, v8, s8 -; RV64-NEXT: slli s8, t0, 45 -; RV64-NEXT: vand.vx v7, v8, s10 -; RV64-NEXT: slli s9, t0, 46 -; RV64-NEXT: vand.vx v6, v8, s11 -; RV64-NEXT: slli s10, t0, 47 -; RV64-NEXT: vand.vx v5, v8, t5 -; RV64-NEXT: slli s11, t0, 48 -; RV64-NEXT: vand.vx v0, v8, t3 -; RV64-NEXT: slli ra, t0, 49 -; RV64-NEXT: slli t5, t0, 50 -; RV64-NEXT: slli t4, t0, 51 -; RV64-NEXT: slli t3, t0, 52 -; RV64-NEXT: slli t2, t0, 53 -; RV64-NEXT: slli t1, t0, 54 -; RV64-NEXT: slli a7, t0, 55 -; RV64-NEXT: slli a6, t0, 56 -; RV64-NEXT: slli a5, t0, 57 -; RV64-NEXT: slli a4, t0, 58 -; RV64-NEXT: slli a3, t0, 59 -; RV64-NEXT: slli a2, t0, 60 -; RV64-NEXT: slli a1, t0, 61 -; RV64-NEXT: slli t0, t0, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vi v4, v8, 2 -; RV64-NEXT: vand.vi v3, v8, 1 -; RV64-NEXT: vand.vi v2, v8, 4 -; RV64-NEXT: vand.vi v1, v8, 8 -; RV64-NEXT: vmul.vv v4, v8, v4 -; RV64-NEXT: sd t6, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 5 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v4, v8, v3 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 5 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v4, v8, v2 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 5 -; RV64-NEXT: sub t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v4, v8, v1 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v10 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v11 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v12 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v13 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v14 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v15 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v16 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v17 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v18 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v19 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v20 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v21 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 4 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v22 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v23 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 4 -; RV64-NEXT: sub t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v24 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v25 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v26 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v27 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v28 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v29 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 3 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v30 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v31 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 3 -; RV64-NEXT: sub t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v7 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v6 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 2 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v5 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v0 -; RV64-NEXT: csrr s4, vlenb -; RV64-NEXT: slli t6, s4, 1 -; RV64-NEXT: add s4, t6, s4 -; RV64-NEXT: ld t6, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s4, sp, s4 -; RV64-NEXT: addi s4, s4, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr s4, vlenb -; RV64-NEXT: slli s4, s4, 1 -; RV64-NEXT: add s4, sp, s4 -; RV64-NEXT: addi s4, s4, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr s4, vlenb -; RV64-NEXT: add s4, sp, s4 -; RV64-NEXT: addi s4, s4, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: addi s4, sp, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v3, v8, v9 -; RV64-NEXT: ld s4, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v4, v8, v9 -; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v5, v8, v9 -; RV64-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v6, v8, v9 -; RV64-NEXT: ld s4, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v7, v8, v9 -; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v31, v8, v9 -; RV64-NEXT: ld s4, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v30, v8, v9 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v29, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s5 -; RV64-NEXT: vmul.vv v28, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s6 -; RV64-NEXT: vmul.vv v27, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s7 -; RV64-NEXT: vmul.vv v26, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s8 -; RV64-NEXT: vmul.vv v25, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s9 -; RV64-NEXT: vmul.vv v24, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s10 -; RV64-NEXT: vmul.vv v23, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s11 -; RV64-NEXT: vmul.vv v22, v8, v9 -; RV64-NEXT: vand.vx v9, v8, ra -; RV64-NEXT: vmul.vv v21, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t5 -; RV64-NEXT: vmul.vv v20, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t4 -; RV64-NEXT: vmul.vv v19, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t3 -; RV64-NEXT: vmul.vv v18, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t2 -; RV64-NEXT: vmul.vv v17, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t1 -; RV64-NEXT: vmul.vv v16, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a7 -; RV64-NEXT: vmul.vv v15, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a6 -; RV64-NEXT: vmul.vv v14, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a5 -; RV64-NEXT: vmul.vv v13, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a4 -; RV64-NEXT: vmul.vv v12, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a3 -; RV64-NEXT: vmul.vv v11, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a2 -; RV64-NEXT: vmul.vv v10, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: vand.vx v0, v8, t0 -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vand.vx v1, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 5 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v2, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 5 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 4 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 4 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 2 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: addi a0, sp, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v1, v2 -; RV64-NEXT: vxor.vv v3, v2, v3 -; RV64-NEXT: vxor.vv v4, v3, v4 -; RV64-NEXT: vxor.vv v5, v4, v5 -; RV64-NEXT: vxor.vv v6, v5, v6 -; RV64-NEXT: vxor.vv v7, v6, v7 -; RV64-NEXT: vxor.vv v31, v7, v31 -; RV64-NEXT: vxor.vv v30, v31, v30 -; RV64-NEXT: vxor.vv v29, v30, v29 -; RV64-NEXT: vxor.vv v28, v29, v28 -; RV64-NEXT: vxor.vv v27, v28, v27 -; RV64-NEXT: vxor.vv v26, v27, v26 -; RV64-NEXT: vxor.vv v25, v26, v25 -; RV64-NEXT: vxor.vv v24, v25, v24 -; RV64-NEXT: vxor.vv v23, v24, v23 -; RV64-NEXT: vxor.vv v22, v23, v22 -; RV64-NEXT: vxor.vv v21, v22, v21 -; RV64-NEXT: vxor.vv v20, v21, v20 -; RV64-NEXT: vxor.vv v19, v20, v19 -; RV64-NEXT: vxor.vv v18, v19, v18 -; RV64-NEXT: vxor.vv v17, v18, v17 -; RV64-NEXT: vxor.vv v16, v17, v16 -; RV64-NEXT: vxor.vv v15, v16, v15 -; RV64-NEXT: vxor.vv v14, v15, v14 -; RV64-NEXT: vxor.vv v13, v14, v13 -; RV64-NEXT: vxor.vv v12, v13, v12 -; RV64-NEXT: vxor.vv v11, v12, v11 -; RV64-NEXT: vxor.vv v10, v11, v10 -; RV64-NEXT: vxor.vv v9, v10, v9 -; RV64-NEXT: vxor.vv v9, v9, v0 -; RV64-NEXT: vxor.vv v8, v9, v8 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: li a1, 40 -; RV64-NEXT: vsrl.vx v10, v8, a1 -; RV64-NEXT: vsrl.vi v11, v8, 24 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vand.vx v10, v10, s0 -; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vand.vx v10, v8, t6 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v11, v11, a2 -; RV64-NEXT: vand.vx v12, v12, t6 -; RV64-NEXT: vor.vv v11, v12, v11 -; RV64-NEXT: vand.vx v12, v8, a2 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsll.vx v12, v8, a0 -; RV64-NEXT: vand.vx v8, v8, s0 -; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vor.vv v9, v11, v9 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vand.vx v8, v8, s1 -; RV64-NEXT: vand.vx v9, v9, s1 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vand.vx v8, v8, s2 -; RV64-NEXT: vand.vx v9, v9, s2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v8, v8, s3 -; RV64-NEXT: vand.vx v9, v9, s3 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 224 -; RV64-NEXT: ret - %a = call <1 x i64> @llvm.clmulr.v1i64(<1 x i64> %x, <1 x i64> %y) - ret <1 x i64> %a -} - -define <2 x i64> @clmulr_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { -; RV32-LABEL: clmulr_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s7, 1044480 -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: li s11, 1 -; RV32-NEXT: li s8, 2 -; RV32-NEXT: li s9, 4 -; RV32-NEXT: li s10, 8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: li a5, 64 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: li ra, 256 -; RV32-NEXT: li a0, 512 -; RV32-NEXT: li a1, 1024 -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: lui t0, 2 -; RV32-NEXT: lui t1, 4 -; RV32-NEXT: lui t2, 8 -; RV32-NEXT: lui t3, 16 -; RV32-NEXT: lui t4, 32 -; RV32-NEXT: lui t5, 64 -; RV32-NEXT: lui t6, 128 -; RV32-NEXT: lui s0, 256 -; RV32-NEXT: lui s1, 512 -; RV32-NEXT: lui s2, 1024 -; RV32-NEXT: lui s3, 2048 -; RV32-NEXT: lui s4, 4096 -; RV32-NEXT: lui s5, 8192 -; RV32-NEXT: lui s6, 16384 -; RV32-NEXT: sw s7, 272(sp) -; RV32-NEXT: lui s7, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw a7, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw s11, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw s8, 252(sp) -; RV32-NEXT: lui s8, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw s9, 244(sp) -; RV32-NEXT: lui s9, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw s10, 236(sp) -; RV32-NEXT: lui s10, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw a3, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw a4, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw a5, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw a6, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw ra, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw a0, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a1, 180(sp) -; RV32-NEXT: slli s11, s11, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw s11, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a2, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw t0, 156(sp) -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw t1, 148(sp) -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw t2, 140(sp) -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw t3, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw t4, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw t5, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw t6, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s0, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s1, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s2, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s3, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s4, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s5, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s6, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s7, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw s8, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw s9, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw s10, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw a7, 12(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v3, a0 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vmv.v.x v2, a0 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vmv.v.x v1, a0 -; RV32-NEXT: addi a0, sp, 272 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v0, (a0), zero -; RV32-NEXT: addi a0, sp, 264 -; RV32-NEXT: vlse64.v v13, (a0), zero -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vlse64.v v14, (a0), zero -; RV32-NEXT: addi a0, sp, 248 -; RV32-NEXT: vlse64.v v15, (a0), zero -; RV32-NEXT: addi a0, sp, 240 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: addi a0, sp, 232 -; RV32-NEXT: vlse64.v v17, (a0), zero -; RV32-NEXT: addi a0, sp, 224 -; RV32-NEXT: vlse64.v v18, (a0), zero -; RV32-NEXT: addi a0, sp, 216 -; RV32-NEXT: vlse64.v v19, (a0), zero -; RV32-NEXT: addi a0, sp, 208 -; RV32-NEXT: vlse64.v v20, (a0), zero -; RV32-NEXT: addi a0, sp, 200 -; RV32-NEXT: vlse64.v v21, (a0), zero -; RV32-NEXT: addi a0, sp, 192 -; RV32-NEXT: vlse64.v v22, (a0), zero -; RV32-NEXT: addi a0, sp, 184 -; RV32-NEXT: vlse64.v v23, (a0), zero -; RV32-NEXT: addi a0, sp, 176 -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: addi a0, sp, 168 -; RV32-NEXT: vlse64.v v25, (a0), zero -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: vlse64.v v26, (a0), zero -; RV32-NEXT: addi a0, sp, 152 -; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: addi a0, sp, 144 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: addi a0, sp, 136 -; RV32-NEXT: vlse64.v v29, (a0), zero -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: vlse64.v v30, (a0), zero -; RV32-NEXT: addi a0, sp, 120 -; RV32-NEXT: vlse64.v v31, (a0), zero -; RV32-NEXT: addi a0, sp, 112 -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: addi a0, sp, 104 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: vlse64.v v5, (a0), zero -; RV32-NEXT: addi a0, sp, 88 -; RV32-NEXT: vlse64.v v4, (a0), zero -; RV32-NEXT: li a6, 56 -; RV32-NEXT: vsrl.vi v27, v8, 24 -; RV32-NEXT: vsrl.vx v28, v8, a6 -; RV32-NEXT: li ra, 40 -; RV32-NEXT: vsrl.vx v7, v8, ra -; RV32-NEXT: vsll.vx v6, v8, a6 -; RV32-NEXT: addi a4, t3, -256 -; RV32-NEXT: vand.vx v7, v7, a4 -; RV32-NEXT: vor.vv v28, v7, v28 -; RV32-NEXT: vand.vx v7, v8, a4 -; RV32-NEXT: vsll.vx v7, v7, ra -; RV32-NEXT: vor.vv v7, v6, v7 -; RV32-NEXT: vsrl.vi v6, v8, 8 -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v27, v27, a5 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v6, v6, v0 -; RV32-NEXT: vor.vv v27, v6, v27 -; RV32-NEXT: addi a3, sp, 80 -; RV32-NEXT: vlse64.v v6, (a3), zero -; RV32-NEXT: vor.vv v27, v27, v28 -; RV32-NEXT: vand.vx v28, v8, a5 -; RV32-NEXT: vsll.vi v28, v28, 24 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v28, v8 -; RV32-NEXT: addi a3, sp, 72 -; RV32-NEXT: vlse64.v v28, (a3), zero -; RV32-NEXT: vor.vv v8, v7, v8 -; RV32-NEXT: addi a3, sp, 64 -; RV32-NEXT: vlse64.v v7, (a3), zero -; RV32-NEXT: vor.vv v8, v8, v27 -; RV32-NEXT: vsrl.vi v27, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v3 -; RV32-NEXT: vand.vv v27, v27, v3 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: vsrl.vi v27, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v2 -; RV32-NEXT: vand.vv v27, v27, v2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: vsrl.vi v27, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v1 -; RV32-NEXT: vand.vv v27, v27, v1 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v27, v8 -; RV32-NEXT: addi a3, sp, 56 -; RV32-NEXT: vlse64.v v27, (a3), zero -; RV32-NEXT: vand.vv v13, v8, v13 -; RV32-NEXT: vand.vv v14, v8, v14 -; RV32-NEXT: vand.vv v15, v8, v15 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vv v17, v8, v17 -; RV32-NEXT: vand.vv v18, v8, v18 -; RV32-NEXT: vand.vv v19, v8, v19 -; RV32-NEXT: vand.vv v20, v8, v20 -; RV32-NEXT: vand.vv v21, v8, v21 -; RV32-NEXT: vand.vv v22, v8, v22 -; RV32-NEXT: vand.vv v23, v8, v23 -; RV32-NEXT: vand.vv v24, v8, v24 -; RV32-NEXT: vand.vv v25, v8, v25 -; RV32-NEXT: vand.vv v26, v8, v26 -; RV32-NEXT: vand.vv v3, v8, v9 -; RV32-NEXT: vand.vv v2, v8, v10 -; RV32-NEXT: vand.vv v29, v8, v29 -; RV32-NEXT: vand.vv v30, v8, v30 -; RV32-NEXT: vand.vv v31, v8, v31 -; RV32-NEXT: vand.vv v0, v8, v11 -; RV32-NEXT: vand.vv v9, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v5, v8, v5 -; RV32-NEXT: vand.vv v4, v8, v4 -; RV32-NEXT: vand.vv v6, v8, v6 -; RV32-NEXT: vand.vv v9, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: addi a0, sp, 40 -; RV32-NEXT: vlse64.v v9, (a3), zero -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vand.vv v11, v8, v7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v11, v8, v27 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v9, (a2), zero -; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: vlse64.v v11, (a1), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vand.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v11 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vv v9, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vi v9, v8, 8 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: li a0, 1024 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s11 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v9, v8, a0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t3 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t5 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, t6 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s0 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s1 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s2 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s3 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s4 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s5 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 2 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s6 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s7 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s8 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v9, v8, s9 -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vand.vx v1, v8, s10 -; RV32-NEXT: vmul.vv v1, v8, v1 -; RV32-NEXT: vmul.vv v9, v8, v13 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: vmul.vv v10, v8, v14 -; RV32-NEXT: vmul.vv v11, v8, v15 -; RV32-NEXT: vmul.vv v12, v8, v16 -; RV32-NEXT: vmul.vv v13, v8, v17 -; RV32-NEXT: vmul.vv v14, v8, v18 -; RV32-NEXT: vmul.vv v15, v8, v19 -; RV32-NEXT: vmul.vv v16, v8, v20 -; RV32-NEXT: vmul.vv v17, v8, v21 -; RV32-NEXT: vmul.vv v18, v8, v22 -; RV32-NEXT: vmul.vv v19, v8, v23 -; RV32-NEXT: vmul.vv v20, v8, v24 -; RV32-NEXT: vmul.vv v21, v8, v25 -; RV32-NEXT: vmul.vv v22, v8, v26 -; RV32-NEXT: vmul.vv v23, v8, v3 -; RV32-NEXT: vmul.vv v24, v8, v2 -; RV32-NEXT: vmul.vv v25, v8, v29 -; RV32-NEXT: vmul.vv v26, v8, v30 -; RV32-NEXT: vmul.vv v27, v8, v31 -; RV32-NEXT: vmul.vv v28, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v29, v8, v29 -; RV32-NEXT: vmul.vv v30, v8, v5 -; RV32-NEXT: vmul.vv v31, v8, v4 -; RV32-NEXT: vmul.vv v7, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v6, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v5, v8, v5 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v4, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v3, v8, v3 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v2, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 5 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v9, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 4 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 3 -; RV32-NEXT: sub a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 2 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a1, a0, 1 -; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vxor.vv v8, v8, v1 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: vxor.vv v8, v8, v11 -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vxor.vv v8, v8, v13 -; RV32-NEXT: vxor.vv v8, v8, v14 -; RV32-NEXT: vxor.vv v8, v8, v15 -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v17 -; RV32-NEXT: vxor.vv v8, v8, v18 -; RV32-NEXT: vxor.vv v8, v8, v19 -; RV32-NEXT: vxor.vv v8, v8, v20 -; RV32-NEXT: vxor.vv v8, v8, v21 -; RV32-NEXT: vxor.vv v8, v8, v22 -; RV32-NEXT: vxor.vv v8, v8, v23 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v25 -; RV32-NEXT: vxor.vv v8, v8, v26 -; RV32-NEXT: vxor.vv v8, v8, v27 -; RV32-NEXT: vxor.vv v8, v8, v28 -; RV32-NEXT: vxor.vv v8, v8, v29 -; RV32-NEXT: vxor.vv v8, v8, v30 -; RV32-NEXT: vxor.vv v8, v8, v31 -; RV32-NEXT: vxor.vv v8, v8, v7 -; RV32-NEXT: vxor.vv v8, v8, v6 -; RV32-NEXT: vxor.vv v8, v8, v5 -; RV32-NEXT: vxor.vv v8, v8, v4 -; RV32-NEXT: vxor.vv v8, v8, v3 -; RV32-NEXT: vxor.vv v8, v8, v2 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vx v9, v8, a6 -; RV32-NEXT: vsll.vx v10, v8, a6 -; RV32-NEXT: vsrl.vx v11, v8, ra -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vand.vx v11, v11, a4 -; RV32-NEXT: vsrl.vi v13, v8, 24 -; RV32-NEXT: vand.vx v14, v8, a5 -; RV32-NEXT: vand.vx v13, v13, a5 -; RV32-NEXT: vsll.vx v12, v12, ra -; RV32-NEXT: vsrl.vi v15, v8, 8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v15, v15, v16 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vor.vv v11, v15, v13 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vsll.vi v13, v14, 24 -; RV32-NEXT: vor.vv v8, v13, v8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v9, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -224 -; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: li s11, 56 -; RV64-NEXT: li ra, 40 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vsrl.vi v10, v8, 24 -; RV64-NEXT: vsrl.vi v9, v8, 8 -; RV64-NEXT: li t2, 255 -; RV64-NEXT: lui t6, 61681 -; RV64-NEXT: lui s0, 209715 -; RV64-NEXT: lui s1, 349525 -; RV64-NEXT: li s10, 16 -; RV64-NEXT: li s9, 32 -; RV64-NEXT: li s8, 64 -; RV64-NEXT: li s7, 128 -; RV64-NEXT: li s5, 256 -; RV64-NEXT: li t5, 512 -; RV64-NEXT: li t3, 1024 -; RV64-NEXT: li t0, 1 -; RV64-NEXT: lui s6, 1 -; RV64-NEXT: lui s4, 2 -; RV64-NEXT: lui t4, 4 -; RV64-NEXT: lui t1, 8 -; RV64-NEXT: lui a7, 32 -; RV64-NEXT: lui a6, 64 -; RV64-NEXT: lui a5, 128 -; RV64-NEXT: lui a4, 256 -; RV64-NEXT: lui a3, 512 -; RV64-NEXT: lui a2, 1024 -; RV64-NEXT: vsrl.vx v11, v8, s11 -; RV64-NEXT: vsrl.vx v12, v8, ra -; RV64-NEXT: addi t6, t6, -241 -; RV64-NEXT: addi s2, s0, 819 -; RV64-NEXT: addi s3, s1, 1365 -; RV64-NEXT: slli s1, t6, 32 -; RV64-NEXT: add s1, t6, s1 -; RV64-NEXT: slli t6, s2, 32 -; RV64-NEXT: add s2, s2, t6 -; RV64-NEXT: slli t6, s3, 32 -; RV64-NEXT: add s3, s3, t6 -; RV64-NEXT: addi s0, a0, -256 -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: lui a0, 4080 -; RV64-NEXT: vand.vx v10, v10, a0 -; RV64-NEXT: slli t6, t2, 24 -; RV64-NEXT: vand.vx v13, v8, a0 -; RV64-NEXT: vsll.vx v14, v8, s11 -; RV64-NEXT: vand.vx v12, v12, s0 -; RV64-NEXT: vand.vx v9, v9, t6 -; RV64-NEXT: vsll.vi v13, v13, 24 -; RV64-NEXT: vand.vx v15, v8, t6 -; RV64-NEXT: vand.vx v8, v8, s0 -; RV64-NEXT: vor.vv v11, v12, v11 -; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vsll.vi v10, v15, 8 -; RV64-NEXT: vsll.vx v8, v8, ra -; RV64-NEXT: vor.vv v9, v9, v11 -; RV64-NEXT: vor.vv v10, v13, v10 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vand.vx v8, v8, s1 -; RV64-NEXT: vand.vx v9, v9, s1 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vand.vx v8, v8, s2 -; RV64-NEXT: vand.vx v9, v9, s2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v8, v8, s3 -; RV64-NEXT: vand.vx v9, v9, s3 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vand.vx v9, v8, s10 -; RV64-NEXT: lui t2, 4096 -; RV64-NEXT: vand.vx v10, v8, s9 -; RV64-NEXT: lui s9, 8192 -; RV64-NEXT: vand.vx v11, v8, s8 -; RV64-NEXT: lui s8, 16384 -; RV64-NEXT: vand.vx v12, v8, s7 -; RV64-NEXT: lui s10, 32768 -; RV64-NEXT: vand.vx v13, v8, s5 -; RV64-NEXT: lui s11, 65536 -; RV64-NEXT: vand.vx v14, v8, t5 -; RV64-NEXT: lui t5, 131072 -; RV64-NEXT: vand.vx v15, v8, t3 -; RV64-NEXT: slli t3, t0, 11 -; RV64-NEXT: vand.vx v16, v8, t3 -; RV64-NEXT: lui t3, 262144 -; RV64-NEXT: vand.vx v17, v8, s6 -; RV64-NEXT: slli a0, t0, 31 -; RV64-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v18, v8, s4 -; RV64-NEXT: slli a0, t0, 32 -; RV64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v19, v8, t4 -; RV64-NEXT: slli a0, t0, 33 -; RV64-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v20, v8, t1 -; RV64-NEXT: slli a0, t0, 34 -; RV64-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v21, v8, a1 -; RV64-NEXT: slli a0, t0, 35 -; RV64-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v22, v8, a7 -; RV64-NEXT: slli a0, t0, 36 -; RV64-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v23, v8, a6 -; RV64-NEXT: slli a0, t0, 37 -; RV64-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v24, v8, a5 -; RV64-NEXT: slli a0, t0, 38 -; RV64-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v25, v8, a4 -; RV64-NEXT: slli a0, t0, 39 -; RV64-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v26, v8, a3 -; RV64-NEXT: slli a0, t0, 40 -; RV64-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v27, v8, a2 -; RV64-NEXT: slli a0, t0, 41 -; RV64-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: lui a0, 2048 -; RV64-NEXT: vand.vx v28, v8, a0 -; RV64-NEXT: slli s5, t0, 42 -; RV64-NEXT: vand.vx v29, v8, t2 -; RV64-NEXT: slli s6, t0, 43 -; RV64-NEXT: vand.vx v30, v8, s9 -; RV64-NEXT: slli s7, t0, 44 -; RV64-NEXT: vand.vx v31, v8, s8 -; RV64-NEXT: slli s8, t0, 45 -; RV64-NEXT: vand.vx v7, v8, s10 -; RV64-NEXT: slli s9, t0, 46 -; RV64-NEXT: vand.vx v6, v8, s11 -; RV64-NEXT: slli s10, t0, 47 -; RV64-NEXT: vand.vx v5, v8, t5 -; RV64-NEXT: slli s11, t0, 48 -; RV64-NEXT: vand.vx v0, v8, t3 -; RV64-NEXT: slli ra, t0, 49 -; RV64-NEXT: slli t5, t0, 50 -; RV64-NEXT: slli t4, t0, 51 -; RV64-NEXT: slli t3, t0, 52 -; RV64-NEXT: slli t2, t0, 53 -; RV64-NEXT: slli t1, t0, 54 -; RV64-NEXT: slli a7, t0, 55 -; RV64-NEXT: slli a6, t0, 56 -; RV64-NEXT: slli a5, t0, 57 -; RV64-NEXT: slli a4, t0, 58 -; RV64-NEXT: slli a3, t0, 59 -; RV64-NEXT: slli a2, t0, 60 -; RV64-NEXT: slli a1, t0, 61 -; RV64-NEXT: slli t0, t0, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vi v4, v8, 2 -; RV64-NEXT: vand.vi v3, v8, 1 -; RV64-NEXT: vand.vi v2, v8, 4 -; RV64-NEXT: vand.vi v1, v8, 8 -; RV64-NEXT: vmul.vv v4, v8, v4 -; RV64-NEXT: sd t6, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 5 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v4, v8, v3 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 5 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v4, v8, v2 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 5 -; RV64-NEXT: sub t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v4, v8, v1 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v4, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v10 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v11 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v12 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v13 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v14 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v15 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v16 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v17 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v18 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v19 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v20 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v21 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 4 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v22 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v23 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 4 -; RV64-NEXT: sub t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v24 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v25 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v26 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v27 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add s4, s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v28 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v29 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 3 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v30 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v31 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 3 -; RV64-NEXT: sub t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v7 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: mv s4, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s4 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v6 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli s4, t6, 2 -; RV64-NEXT: add t6, s4, t6 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v5 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 2 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 112 -; RV64-NEXT: vs1r.v v9, (t6) # vscale x 8-byte Folded Spill -; RV64-NEXT: vmul.vv v9, v8, v0 -; RV64-NEXT: csrr s4, vlenb -; RV64-NEXT: slli t6, s4, 1 -; RV64-NEXT: add s4, t6, s4 -; RV64-NEXT: ld t6, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s4, sp, s4 -; RV64-NEXT: addi s4, s4, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr s4, vlenb -; RV64-NEXT: slli s4, s4, 1 -; RV64-NEXT: add s4, sp, s4 -; RV64-NEXT: addi s4, s4, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: csrr s4, vlenb -; RV64-NEXT: add s4, sp, s4 -; RV64-NEXT: addi s4, s4, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: addi s4, sp, 112 -; RV64-NEXT: vs1r.v v9, (s4) # vscale x 8-byte Folded Spill -; RV64-NEXT: ld s4, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v3, v8, v9 -; RV64-NEXT: ld s4, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v4, v8, v9 -; RV64-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v5, v8, v9 -; RV64-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v6, v8, v9 -; RV64-NEXT: ld s4, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v7, v8, v9 -; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v31, v8, v9 -; RV64-NEXT: ld s4, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v30, v8, v9 -; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v9, v8, s4 -; RV64-NEXT: vmul.vv v29, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s5 -; RV64-NEXT: vmul.vv v28, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s6 -; RV64-NEXT: vmul.vv v27, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s7 -; RV64-NEXT: vmul.vv v26, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s8 -; RV64-NEXT: vmul.vv v25, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s9 -; RV64-NEXT: vmul.vv v24, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s10 -; RV64-NEXT: vmul.vv v23, v8, v9 -; RV64-NEXT: vand.vx v9, v8, s11 -; RV64-NEXT: vmul.vv v22, v8, v9 -; RV64-NEXT: vand.vx v9, v8, ra -; RV64-NEXT: vmul.vv v21, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t5 -; RV64-NEXT: vmul.vv v20, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t4 -; RV64-NEXT: vmul.vv v19, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t3 -; RV64-NEXT: vmul.vv v18, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t2 -; RV64-NEXT: vmul.vv v17, v8, v9 -; RV64-NEXT: vand.vx v9, v8, t1 -; RV64-NEXT: vmul.vv v16, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a7 -; RV64-NEXT: vmul.vv v15, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a6 -; RV64-NEXT: vmul.vv v14, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a5 -; RV64-NEXT: vmul.vv v13, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a4 -; RV64-NEXT: vmul.vv v12, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a3 -; RV64-NEXT: vmul.vv v11, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a2 -; RV64-NEXT: vmul.vv v10, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 -; RV64-NEXT: vmul.vv v9, v8, v9 -; RV64-NEXT: vand.vx v0, v8, t0 -; RV64-NEXT: vmul.vv v0, v8, v0 -; RV64-NEXT: vand.vx v1, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 5 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v2, v1 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 5 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 4 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 4 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 3 -; RV64-NEXT: sub a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 2 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a1, a0, 1 -; RV64-NEXT: add a0, a1, a0 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v1, v1, v2 -; RV64-NEXT: addi a0, sp, 112 -; RV64-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v1, v2 -; RV64-NEXT: vxor.vv v3, v2, v3 -; RV64-NEXT: vxor.vv v4, v3, v4 -; RV64-NEXT: vxor.vv v5, v4, v5 -; RV64-NEXT: vxor.vv v6, v5, v6 -; RV64-NEXT: vxor.vv v7, v6, v7 -; RV64-NEXT: vxor.vv v31, v7, v31 -; RV64-NEXT: vxor.vv v30, v31, v30 -; RV64-NEXT: vxor.vv v29, v30, v29 -; RV64-NEXT: vxor.vv v28, v29, v28 -; RV64-NEXT: vxor.vv v27, v28, v27 -; RV64-NEXT: vxor.vv v26, v27, v26 -; RV64-NEXT: vxor.vv v25, v26, v25 -; RV64-NEXT: vxor.vv v24, v25, v24 -; RV64-NEXT: vxor.vv v23, v24, v23 -; RV64-NEXT: vxor.vv v22, v23, v22 -; RV64-NEXT: vxor.vv v21, v22, v21 -; RV64-NEXT: vxor.vv v20, v21, v20 -; RV64-NEXT: vxor.vv v19, v20, v19 -; RV64-NEXT: vxor.vv v18, v19, v18 -; RV64-NEXT: vxor.vv v17, v18, v17 -; RV64-NEXT: vxor.vv v16, v17, v16 -; RV64-NEXT: vxor.vv v15, v16, v15 -; RV64-NEXT: vxor.vv v14, v15, v14 -; RV64-NEXT: vxor.vv v13, v14, v13 -; RV64-NEXT: vxor.vv v12, v13, v12 -; RV64-NEXT: vxor.vv v11, v12, v11 -; RV64-NEXT: vxor.vv v10, v11, v10 -; RV64-NEXT: vxor.vv v9, v10, v9 -; RV64-NEXT: vxor.vv v9, v9, v0 -; RV64-NEXT: vxor.vv v8, v9, v8 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v9, v8, a0 -; RV64-NEXT: li a1, 40 -; RV64-NEXT: vsrl.vx v10, v8, a1 -; RV64-NEXT: vsrl.vi v11, v8, 24 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: vand.vx v10, v10, s0 -; RV64-NEXT: vor.vv v9, v10, v9 -; RV64-NEXT: vand.vx v10, v8, t6 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v11, v11, a2 -; RV64-NEXT: vand.vx v12, v12, t6 -; RV64-NEXT: vor.vv v11, v12, v11 -; RV64-NEXT: vand.vx v12, v8, a2 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v12, v12, 24 -; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vsll.vx v12, v8, a0 -; RV64-NEXT: vand.vx v8, v8, s0 -; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vor.vv v9, v11, v9 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: vsrl.vi v9, v8, 4 -; RV64-NEXT: vand.vx v8, v8, s1 -; RV64-NEXT: vand.vx v9, v9, s1 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 2 -; RV64-NEXT: vand.vx v8, v8, s2 -; RV64-NEXT: vand.vx v9, v9, s2 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v8, v8, s3 -; RV64-NEXT: vand.vx v9, v9, s3 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 224 -; RV64-NEXT: ret - %a = call <2 x i64> @llvm.clmulr.v2i64(<2 x i64> %x, <2 x i64> %y) - ret <2 x i64> %a -} - -define <4 x i64> @clmulr_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; RV32-LABEL: clmulr_v4i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s7, 1044480 -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: li a1, 1 -; RV32-NEXT: li s8, 2 -; RV32-NEXT: li s9, 4 -; RV32-NEXT: li s10, 8 -; RV32-NEXT: li a3, 16 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: li a5, 64 -; RV32-NEXT: li a6, 128 -; RV32-NEXT: li s11, 256 -; RV32-NEXT: li ra, 512 -; RV32-NEXT: li a0, 1024 -; RV32-NEXT: lui a2, 1 -; RV32-NEXT: lui t0, 2 -; RV32-NEXT: lui t1, 4 -; RV32-NEXT: lui t2, 8 -; RV32-NEXT: lui t3, 16 -; RV32-NEXT: lui t4, 32 -; RV32-NEXT: lui t5, 64 -; RV32-NEXT: lui t6, 128 -; RV32-NEXT: lui s0, 256 -; RV32-NEXT: lui s1, 512 -; RV32-NEXT: lui s2, 1024 -; RV32-NEXT: lui s3, 2048 -; RV32-NEXT: lui s4, 4096 -; RV32-NEXT: lui s5, 8192 -; RV32-NEXT: lui s6, 16384 -; RV32-NEXT: sw s7, 272(sp) -; RV32-NEXT: lui s7, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw a7, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw a1, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw s8, 252(sp) -; RV32-NEXT: lui s8, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw s9, 244(sp) -; RV32-NEXT: lui s9, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw s10, 236(sp) -; RV32-NEXT: lui s10, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw a3, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw a4, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw a5, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw a6, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw s11, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw ra, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a0, 180(sp) -; RV32-NEXT: slli a5, a1, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw a5, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a2, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw t0, 156(sp) -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw t1, 148(sp) -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw t2, 140(sp) -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw t3, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw t4, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw t5, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw t6, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s0, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s1, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s2, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s3, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s4, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s5, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s6, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s7, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw s8, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw s9, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw s10, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw a7, 12(sp) -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v4, a0 -; RV32-NEXT: lui a0, 209715 -; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: vmv.v.x v2, a0 -; RV32-NEXT: lui a0, 349525 -; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: addi a0, sp, 272 -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v6, (a0), zero -; RV32-NEXT: addi a0, sp, 264 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 256 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: addi a0, sp, 248 -; RV32-NEXT: vlse64.v v14, (a0), zero -; RV32-NEXT: addi a0, sp, 240 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: addi a0, sp, 232 -; RV32-NEXT: vlse64.v v18, (a0), zero -; RV32-NEXT: addi a0, sp, 224 -; RV32-NEXT: vlse64.v v20, (a0), zero -; RV32-NEXT: addi a0, sp, 216 -; RV32-NEXT: vlse64.v v22, (a0), zero -; RV32-NEXT: li ra, 56 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vsrl.vx v26, v8, ra -; RV32-NEXT: li s11, 40 -; RV32-NEXT: vsrl.vx v28, v8, s11 -; RV32-NEXT: vsll.vx v30, v8, ra -; RV32-NEXT: addi a4, t3, -256 -; RV32-NEXT: vand.vx v28, v28, a4 -; RV32-NEXT: vor.vv v26, v28, v26 -; RV32-NEXT: vand.vx v28, v8, a4 -; RV32-NEXT: vsll.vx v28, v28, s11 -; RV32-NEXT: vor.vv v30, v30, v28 -; RV32-NEXT: vsrl.vi v28, v8, 8 -; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v28, v28, v6 -; RV32-NEXT: vor.vv v28, v28, v24 -; RV32-NEXT: addi a3, sp, 208 -; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: vor.vv v10, v28, v26 -; RV32-NEXT: vand.vx v26, v8, a6 -; RV32-NEXT: vsll.vi v26, v26, 24 -; RV32-NEXT: vand.vv v8, v8, v6 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v26, v8 -; RV32-NEXT: addi a3, sp, 200 -; RV32-NEXT: vlse64.v v28, (a3), zero -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: addi a3, sp, 192 -; RV32-NEXT: vlse64.v v26, (a3), zero -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v30, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v4 -; RV32-NEXT: vand.vv v30, v30, v4 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: vsrl.vi v30, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v2, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v2 -; RV32-NEXT: vand.vv v30, v30, v2 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: vsrl.vi v30, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v0, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vand.vv v30, v30, v0 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v30, v8 -; RV32-NEXT: addi a3, sp, 184 -; RV32-NEXT: vlse64.v v30, (a3), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v6, v8, v10 -; RV32-NEXT: vand.vv v4, v8, v12 -; RV32-NEXT: vand.vv v2, v8, v14 -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vand.vv v10, v8, v18 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v22 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v28, v8, v28 -; RV32-NEXT: addi a3, sp, 176 -; RV32-NEXT: addi a0, sp, 168 -; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vand.vv v14, v8, v26 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v14, v8, v30 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a2, sp, 160 -; RV32-NEXT: addi a3, sp, 152 -; RV32-NEXT: addi a1, sp, 144 -; RV32-NEXT: addi a0, sp, 136 -; RV32-NEXT: vlse64.v v10, (a2), zero -; RV32-NEXT: vlse64.v v12, (a3), zero -; RV32-NEXT: vlse64.v v14, (a1), zero -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: addi a1, sp, 120 -; RV32-NEXT: addi a2, sp, 112 -; RV32-NEXT: addi a3, sp, 104 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: addi a1, sp, 88 -; RV32-NEXT: addi a2, sp, 80 -; RV32-NEXT: addi a3, sp, 72 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: addi a1, sp, 56 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vlse64.v v12, (a1), zero -; RV32-NEXT: vlse64.v v14, (a2), zero -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v14 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vv v10, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 2 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 1 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 4 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vi v10, v8, 8 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: li a0, 1024 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, a5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t1 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t2 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t3 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t4 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, t6 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s0 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s1 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s2 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s3 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s4 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s5 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s6 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s7 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s8 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s9 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vand.vx v10, v8, s10 -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: vmul.vv v12, v8, v6 -; RV32-NEXT: vmul.vv v14, v8, v4 -; RV32-NEXT: vmul.vv v16, v8, v2 -; RV32-NEXT: vmul.vv v18, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v20, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v20, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v22, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v22, v8, v22 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v24, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v26, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v26, v8, v26 -; RV32-NEXT: vmul.vv v28, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v30, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v30, v8, v30 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v6, v8, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v4, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v4, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v2, v8, v2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v10, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vxor.vv v8, v8, v14 -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v18 -; RV32-NEXT: vxor.vv v8, v8, v20 -; RV32-NEXT: vxor.vv v8, v8, v22 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v26 -; RV32-NEXT: vxor.vv v8, v8, v28 -; RV32-NEXT: vxor.vv v8, v8, v30 -; RV32-NEXT: vxor.vv v8, v8, v6 -; RV32-NEXT: vxor.vv v8, v8, v4 -; RV32-NEXT: vxor.vv v8, v8, v2 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vx v10, v8, ra -; RV32-NEXT: vsll.vx v12, v8, ra -; RV32-NEXT: vsrl.vx v14, v8, s11 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vand.vx v14, v14, a4 -; RV32-NEXT: vsrl.vi v18, v8, 24 -; RV32-NEXT: vand.vx v20, v8, a6 -; RV32-NEXT: vand.vx v18, v18, a6 -; RV32-NEXT: vsll.vx v16, v16, s11 -; RV32-NEXT: vsrl.vi v22, v8, 8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v24, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v22, v22, v24 -; RV32-NEXT: vor.vv v10, v14, v10 -; RV32-NEXT: vor.vv v14, v22, v18 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vsll.vi v18, v20, 24 -; RV32-NEXT: vor.vv v8, v18, v8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v14, v10 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vsrl.vi v10, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -224 -; RV64-NEXT: sd ra, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 128(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 120(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: li s1, 40 -; RV64-NEXT: lui s3, 16 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vsrl.vi v14, v8, 24 -; RV64-NEXT: lui t6, 4080 -; RV64-NEXT: vsrl.vi v10, v8, 8 -; RV64-NEXT: li s2, 255 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 209715 -; RV64-NEXT: lui t5, 349525 -; RV64-NEXT: li t4, 16 -; RV64-NEXT: li t3, 32 -; RV64-NEXT: li t2, 64 -; RV64-NEXT: li t1, 128 -; RV64-NEXT: li t0, 256 -; RV64-NEXT: li a4, 512 -; RV64-NEXT: li a3, 1024 -; RV64-NEXT: li s0, 1 -; RV64-NEXT: lui a2, 1 -; RV64-NEXT: lui a1, 2 -; RV64-NEXT: lui a0, 4 -; RV64-NEXT: vsrl.vx v12, v8, a7 -; RV64-NEXT: vsrl.vx v18, v8, s1 -; RV64-NEXT: addi s4, s3, -256 -; RV64-NEXT: vand.vx v16, v14, t6 -; RV64-NEXT: slli s2, s2, 24 -; RV64-NEXT: vand.vx v20, v8, t6 -; RV64-NEXT: vsll.vx v14, v8, a7 -; RV64-NEXT: addi a7, a5, -241 -; RV64-NEXT: addi a6, a6, 819 -; RV64-NEXT: addi a5, t5, 1365 -; RV64-NEXT: slli t5, s0, 11 -; RV64-NEXT: slli t6, s0, 31 -; RV64-NEXT: sd t6, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: slli t6, s0, 32 -; RV64-NEXT: sd t6, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: slli t6, s0, 33 -; RV64-NEXT: sd t6, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: slli t6, s0, 34 -; RV64-NEXT: sd t6, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: slli t6, s0, 35 -; RV64-NEXT: sd t6, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: slli t6, s0, 36 -; RV64-NEXT: sd t6, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: slli t6, a7, 32 -; RV64-NEXT: add a7, a7, t6 -; RV64-NEXT: slli t6, a6, 32 -; RV64-NEXT: add a6, a6, t6 -; RV64-NEXT: slli t6, a5, 32 -; RV64-NEXT: add a5, a5, t6 -; RV64-NEXT: slli t6, s0, 37 -; RV64-NEXT: sd t6, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v18, v18, s4 -; RV64-NEXT: vand.vx v10, v10, s2 -; RV64-NEXT: vsll.vi v20, v20, 24 -; RV64-NEXT: vand.vx v22, v8, s2 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vor.vv v12, v18, v12 -; RV64-NEXT: vor.vv v10, v10, v16 -; RV64-NEXT: vsll.vi v16, v22, 8 -; RV64-NEXT: vsll.vx v8, v8, s1 -; RV64-NEXT: vor.vv v10, v10, v12 -; RV64-NEXT: vor.vv v12, v20, v16 -; RV64-NEXT: vor.vv v8, v14, v8 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v10, v10, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v10, v10, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v10, v10, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vand.vx v10, v8, t4 -; RV64-NEXT: slli t4, s0, 38 -; RV64-NEXT: sd t4, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t3 -; RV64-NEXT: slli t3, s0, 39 -; RV64-NEXT: sd t3, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v14, v8, t2 -; RV64-NEXT: slli t2, s0, 40 -; RV64-NEXT: sd t2, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v16, v8, t1 -; RV64-NEXT: slli t1, s0, 41 -; RV64-NEXT: sd t1, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: vand.vx v18, v8, t0 -; RV64-NEXT: slli s6, s0, 42 -; RV64-NEXT: vand.vx v20, v8, a4 -; RV64-NEXT: slli s7, s0, 43 -; RV64-NEXT: vand.vx v22, v8, a3 -; RV64-NEXT: slli s8, s0, 44 -; RV64-NEXT: vand.vx v24, v8, t5 -; RV64-NEXT: slli s9, s0, 45 -; RV64-NEXT: vand.vx v26, v8, a2 -; RV64-NEXT: slli s10, s0, 46 -; RV64-NEXT: vand.vx v28, v8, a1 -; RV64-NEXT: slli s11, s0, 47 -; RV64-NEXT: vand.vx v30, v8, a0 -; RV64-NEXT: slli ra, s0, 48 -; RV64-NEXT: slli s3, s0, 49 -; RV64-NEXT: slli s1, s0, 50 -; RV64-NEXT: slli t6, s0, 51 -; RV64-NEXT: slli t5, s0, 52 -; RV64-NEXT: slli t4, s0, 53 -; RV64-NEXT: slli t3, s0, 54 -; RV64-NEXT: slli t2, s0, 55 -; RV64-NEXT: slli t1, s0, 56 -; RV64-NEXT: slli t0, s0, 57 -; RV64-NEXT: slli a4, s0, 58 -; RV64-NEXT: slli a3, s0, 59 -; RV64-NEXT: slli a2, s0, 60 -; RV64-NEXT: slli a1, s0, 61 -; RV64-NEXT: slli s0, s0, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vi v6, v8, 2 -; RV64-NEXT: vand.vi v4, v8, 1 -; RV64-NEXT: vand.vi v2, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v6, v8, v6 -; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v4 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v2 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v6, v8, v0 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v6, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v14 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v18 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v20 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v22 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v24 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v26 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v28 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vmul.vv v10, v8, v30 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 8 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 16 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 32 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 6 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 64 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 128 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 256 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 512 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 1024 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 2048 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 8192 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 16384 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 32768 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 65536 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 131072 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: lui s5, 262144 -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 112 -; RV64-NEXT: vs2r.v v10, (a5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv a5, s5 -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: ld s5, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v10, v8, s5 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s6 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s7 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s8 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s9 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s10 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s11 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, ra -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 112 -; RV64-NEXT: vs2r.v v10, (s5) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s3 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: addi s3, sp, 112 -; RV64-NEXT: vs2r.v v10, (s3) # vscale x 16-byte Folded Spill -; RV64-NEXT: vand.vx v10, v8, s1 -; RV64-NEXT: vmul.vv v4, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t6 -; RV64-NEXT: vmul.vv v6, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t5 -; RV64-NEXT: vmul.vv v30, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t4 -; RV64-NEXT: vmul.vv v28, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t3 -; RV64-NEXT: vmul.vv v26, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t2 -; RV64-NEXT: vmul.vv v24, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t1 -; RV64-NEXT: vmul.vv v22, v8, v10 -; RV64-NEXT: vand.vx v10, v8, t0 -; RV64-NEXT: vmul.vv v20, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a4 -; RV64-NEXT: vmul.vv v18, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a3 -; RV64-NEXT: vmul.vv v16, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a2 -; RV64-NEXT: vmul.vv v14, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vmul.vv v12, v8, v10 -; RV64-NEXT: vand.vx v10, v8, s0 -; RV64-NEXT: vmul.vv v10, v8, v10 -; RV64-NEXT: vand.vx v0, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v0, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v2, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v2 -; RV64-NEXT: addi a0, sp, 112 -; RV64-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload -; RV64-NEXT: vxor.vv v2, v0, v2 -; RV64-NEXT: vxor.vv v4, v2, v4 -; RV64-NEXT: vxor.vv v6, v4, v6 -; RV64-NEXT: vxor.vv v30, v6, v30 -; RV64-NEXT: vxor.vv v28, v30, v28 -; RV64-NEXT: vxor.vv v26, v28, v26 -; RV64-NEXT: vxor.vv v24, v26, v24 -; RV64-NEXT: vxor.vv v22, v24, v22 -; RV64-NEXT: vxor.vv v20, v22, v20 -; RV64-NEXT: vxor.vv v18, v20, v18 -; RV64-NEXT: vxor.vv v16, v18, v16 -; RV64-NEXT: vxor.vv v14, v16, v14 -; RV64-NEXT: vxor.vv v12, v14, v12 -; RV64-NEXT: vxor.vv v10, v12, v10 -; RV64-NEXT: vxor.vv v8, v10, v8 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v10, v8, a0 -; RV64-NEXT: li a1, 40 -; RV64-NEXT: vsrl.vx v12, v8, a1 -; RV64-NEXT: vsrl.vi v14, v8, 24 -; RV64-NEXT: vsrl.vi v16, v8, 8 -; RV64-NEXT: vand.vx v12, v12, s4 -; RV64-NEXT: vor.vv v10, v12, v10 -; RV64-NEXT: vand.vx v12, v8, s2 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v14, v14, a2 -; RV64-NEXT: vand.vx v16, v16, s2 -; RV64-NEXT: vor.vv v14, v16, v14 -; RV64-NEXT: vand.vx v16, v8, a2 -; RV64-NEXT: vsll.vi v12, v12, 8 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vsll.vx v16, v8, a0 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v16, v8 -; RV64-NEXT: vor.vv v10, v14, v10 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vsrl.vi v10, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v10, v10, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v10, v10, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v10, v10, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v10, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 128(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 120(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 224 -; RV64-NEXT: ret - %a = call <4 x i64> @llvm.clmulr.v4i64(<4 x i64> %x, <4 x i64> %y) - ret <4 x i64> %a -} - -define <8 x i64> @clmulr_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; RV32-LABEL: clmulr_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -352 -; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: lui s11, 1044480 -; RV32-NEXT: lui s0, 524288 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: li ra, 2 -; RV32-NEXT: li t4, 4 -; RV32-NEXT: li t2, 8 -; RV32-NEXT: li t6, 16 -; RV32-NEXT: li t5, 32 -; RV32-NEXT: li t3, 64 -; RV32-NEXT: li t1, 128 -; RV32-NEXT: li t0, 256 -; RV32-NEXT: li a7, 512 -; RV32-NEXT: li a6, 1024 -; RV32-NEXT: lui a4, 1 -; RV32-NEXT: lui a3, 2 -; RV32-NEXT: lui a2, 4 -; RV32-NEXT: lui a5, 8 -; RV32-NEXT: lui s1, 16 -; RV32-NEXT: lui a1, 32 -; RV32-NEXT: lui s2, 64 -; RV32-NEXT: lui s3, 128 -; RV32-NEXT: lui s4, 256 -; RV32-NEXT: lui s5, 512 -; RV32-NEXT: lui s6, 1024 -; RV32-NEXT: lui s7, 2048 -; RV32-NEXT: lui s8, 4096 -; RV32-NEXT: lui s9, 8192 -; RV32-NEXT: lui s10, 16384 -; RV32-NEXT: sw s11, 272(sp) -; RV32-NEXT: lui s11, 32768 -; RV32-NEXT: sw zero, 276(sp) -; RV32-NEXT: sw s0, 264(sp) -; RV32-NEXT: sw zero, 268(sp) -; RV32-NEXT: sw zero, 256(sp) -; RV32-NEXT: sw a0, 260(sp) -; RV32-NEXT: sw zero, 248(sp) -; RV32-NEXT: sw ra, 252(sp) -; RV32-NEXT: lui ra, 65536 -; RV32-NEXT: sw zero, 240(sp) -; RV32-NEXT: sw t4, 244(sp) -; RV32-NEXT: lui t4, 131072 -; RV32-NEXT: sw zero, 232(sp) -; RV32-NEXT: sw t2, 236(sp) -; RV32-NEXT: lui t2, 262144 -; RV32-NEXT: sw zero, 224(sp) -; RV32-NEXT: sw t6, 228(sp) -; RV32-NEXT: sw zero, 216(sp) -; RV32-NEXT: sw t5, 220(sp) -; RV32-NEXT: sw zero, 208(sp) -; RV32-NEXT: sw t3, 212(sp) -; RV32-NEXT: sw zero, 200(sp) -; RV32-NEXT: sw t1, 204(sp) -; RV32-NEXT: sw zero, 192(sp) -; RV32-NEXT: sw t0, 196(sp) -; RV32-NEXT: sw zero, 184(sp) -; RV32-NEXT: sw a7, 188(sp) -; RV32-NEXT: sw zero, 176(sp) -; RV32-NEXT: sw a6, 180(sp) -; RV32-NEXT: li t1, 1024 -; RV32-NEXT: slli t6, a0, 11 -; RV32-NEXT: sw zero, 168(sp) -; RV32-NEXT: sw t6, 172(sp) -; RV32-NEXT: sw zero, 160(sp) -; RV32-NEXT: sw a4, 164(sp) -; RV32-NEXT: sw zero, 152(sp) -; RV32-NEXT: sw a3, 156(sp) -; RV32-NEXT: lui t3, 2 -; RV32-NEXT: sw zero, 144(sp) -; RV32-NEXT: sw a2, 148(sp) -; RV32-NEXT: lui t5, 4 -; RV32-NEXT: sw zero, 136(sp) -; RV32-NEXT: sw a5, 140(sp) -; RV32-NEXT: lui a4, 8 -; RV32-NEXT: sw zero, 128(sp) -; RV32-NEXT: sw s1, 132(sp) -; RV32-NEXT: sw zero, 120(sp) -; RV32-NEXT: sw a1, 124(sp) -; RV32-NEXT: sw zero, 112(sp) -; RV32-NEXT: sw s2, 116(sp) -; RV32-NEXT: sw zero, 104(sp) -; RV32-NEXT: sw s3, 108(sp) -; RV32-NEXT: sw zero, 96(sp) -; RV32-NEXT: sw s4, 100(sp) -; RV32-NEXT: sw zero, 88(sp) -; RV32-NEXT: sw s5, 92(sp) -; RV32-NEXT: sw zero, 80(sp) -; RV32-NEXT: sw s6, 84(sp) -; RV32-NEXT: sw zero, 72(sp) -; RV32-NEXT: sw s7, 76(sp) -; RV32-NEXT: sw zero, 64(sp) -; RV32-NEXT: sw s8, 68(sp) -; RV32-NEXT: sw zero, 56(sp) -; RV32-NEXT: sw s9, 60(sp) -; RV32-NEXT: sw zero, 48(sp) -; RV32-NEXT: sw s10, 52(sp) -; RV32-NEXT: sw zero, 40(sp) -; RV32-NEXT: sw s11, 44(sp) -; RV32-NEXT: sw zero, 32(sp) -; RV32-NEXT: sw ra, 36(sp) -; RV32-NEXT: sw zero, 24(sp) -; RV32-NEXT: sw t4, 28(sp) -; RV32-NEXT: sw zero, 16(sp) -; RV32-NEXT: sw t2, 20(sp) -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: sw s0, 12(sp) -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v28, a1 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vmv.v.x v4, a1 -; RV32-NEXT: addi a1, sp, 272 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v0, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li t0, 56 -; RV32-NEXT: vsrl.vi v20, v8, 24 -; RV32-NEXT: vsrl.vx v12, v8, t0 -; RV32-NEXT: li a6, 40 -; RV32-NEXT: vsrl.vx v16, v8, a6 -; RV32-NEXT: vsll.vx v24, v8, t0 -; RV32-NEXT: addi a3, s1, -256 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v16, v16, v12 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vx v12, v12, a6 -; RV32-NEXT: vor.vv v12, v24, v12 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: vand.vx v20, v20, a5 -; RV32-NEXT: lui a7, 349525 -; RV32-NEXT: addi a7, a7, 1365 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v20, v24, v20 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v24, a7 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vand.vx v20, v8, a5 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v20, v8 -; RV32-NEXT: addi a7, sp, 264 -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi a7, sp, 256 -; RV32-NEXT: vlse64.v v12, (a7), zero -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v28 -; RV32-NEXT: vand.vv v16, v16, v28 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v4 -; RV32-NEXT: vand.vv v16, v16, v4 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v24, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: addi a7, sp, 248 -; RV32-NEXT: vlse64.v v16, (a7), zero -; RV32-NEXT: vand.vv v28, v8, v20 -; RV32-NEXT: addi a7, sp, 240 -; RV32-NEXT: addi a0, sp, 232 -; RV32-NEXT: vlse64.v v20, (a7), zero -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v4, v8, v12 -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a2, sp, 224 -; RV32-NEXT: addi a7, sp, 216 -; RV32-NEXT: addi a1, sp, 208 -; RV32-NEXT: addi a0, sp, 200 -; RV32-NEXT: vlse64.v v12, (a2), zero -; RV32-NEXT: vlse64.v v16, (a7), zero -; RV32-NEXT: vlse64.v v20, (a1), zero -; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 192 -; RV32-NEXT: addi a1, sp, 184 -; RV32-NEXT: addi a2, sp, 176 -; RV32-NEXT: addi a7, sp, 168 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a2), zero -; RV32-NEXT: vlse64.v v24, (a7), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 160 -; RV32-NEXT: addi a1, sp, 152 -; RV32-NEXT: addi a2, sp, 144 -; RV32-NEXT: addi a7, sp, 136 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a2), zero -; RV32-NEXT: vlse64.v v24, (a7), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 128 -; RV32-NEXT: addi a1, sp, 120 -; RV32-NEXT: addi a2, sp, 112 -; RV32-NEXT: addi a7, sp, 104 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a2), zero -; RV32-NEXT: vlse64.v v24, (a7), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 96 -; RV32-NEXT: addi a1, sp, 88 -; RV32-NEXT: addi a2, sp, 80 -; RV32-NEXT: addi a7, sp, 72 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a2), zero -; RV32-NEXT: vlse64.v v24, (a7), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 64 -; RV32-NEXT: addi a1, sp, 56 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: addi a7, sp, 40 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a2), zero -; RV32-NEXT: vlse64.v v24, (a7), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: addi a7, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: vlse64.v v20, (a2), zero -; RV32-NEXT: vlse64.v v24, (a7), zero -; RV32-NEXT: vand.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v20 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vv v12, v8, v24 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vi v12, v8, 8 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 16 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 128 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 256 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: li a0, 512 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: lui a0, 1 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s1 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: lui a0, 32 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s3 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s5 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s6 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s7 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s8 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s9 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s10 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, s11 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, ra -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t4 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vand.vx v12, v8, t2 -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: vmul.vv v16, v8, v28 -; RV32-NEXT: vmul.vv v20, v8, v4 -; RV32-NEXT: vmul.vv v24, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v28, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v28, v8, v28 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v4, v8, v4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v0, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 7 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v12, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vi v8, v8, 0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: addi a0, sp, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vxor.vv v8, v8, v20 -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vxor.vv v8, v8, v28 -; RV32-NEXT: vxor.vv v8, v8, v4 -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v12, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vx v12, v8, t0 -; RV32-NEXT: vsrl.vx v16, v8, a6 -; RV32-NEXT: vsrl.vi v20, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vx v20, v20, a5 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 8 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v24, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vor.vv v16, v16, v20 -; RV32-NEXT: vand.vx v20, v8, a5 -; RV32-NEXT: vand.vv v24, v8, v24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vsll.vx v24, v8, t0 -; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vsll.vx v8, v8, a6 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vor.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v8, v8, 4 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vsll.vi v8, v8, 2 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a1, a1, a0 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 288 -; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: slli a0, a0, 6 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 352 -; RV32-NEXT: ret -; -; RV64-LABEL: clmulr_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -240 -; RV64-NEXT: sd ra, 232(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 224(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 216(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 208(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 200(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 192(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s5, 184(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s6, 176(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s7, 168(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s8, 160(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s9, 152(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s10, 144(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s11, 136(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: li a6, 56 -; RV64-NEXT: li t0, 40 -; RV64-NEXT: lui t1, 16 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vsrl.vi v20, v8, 24 -; RV64-NEXT: lui a7, 4080 -; RV64-NEXT: vsrl.vi v12, v8, 8 -; RV64-NEXT: li s0, 255 -; RV64-NEXT: lui a3, 61681 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 349525 -; RV64-NEXT: li a2, 16 -; RV64-NEXT: li a1, 32 -; RV64-NEXT: li a0, 64 -; RV64-NEXT: li s9, 1 -; RV64-NEXT: vsrl.vx v16, v8, a6 -; RV64-NEXT: vsrl.vx v28, v8, t0 -; RV64-NEXT: addi s4, t1, -256 -; RV64-NEXT: vand.vx v24, v20, a7 -; RV64-NEXT: slli s0, s0, 24 -; RV64-NEXT: vand.vx v4, v8, a7 -; RV64-NEXT: vsll.vx v20, v8, a6 -; RV64-NEXT: addi a7, a3, -241 -; RV64-NEXT: addi a6, a4, 819 -; RV64-NEXT: addi a5, a5, 1365 -; RV64-NEXT: slli a3, s9, 11 -; RV64-NEXT: sd a3, 112(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 31 -; RV64-NEXT: sd a3, 104(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 32 -; RV64-NEXT: sd a3, 96(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 33 -; RV64-NEXT: sd a3, 88(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 34 -; RV64-NEXT: sd a3, 80(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 35 -; RV64-NEXT: sd a3, 72(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 36 -; RV64-NEXT: sd a3, 64(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 37 -; RV64-NEXT: sd a3, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 38 -; RV64-NEXT: sd a3, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 39 -; RV64-NEXT: sd a3, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 40 -; RV64-NEXT: sd a3, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: slli a3, s9, 41 -; RV64-NEXT: sd a3, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: slli s6, s9, 42 -; RV64-NEXT: slli s7, s9, 43 -; RV64-NEXT: slli a3, a7, 32 -; RV64-NEXT: add a7, a7, a3 -; RV64-NEXT: slli a3, a6, 32 -; RV64-NEXT: add a6, a6, a3 -; RV64-NEXT: slli a3, a5, 32 -; RV64-NEXT: add a5, a5, a3 -; RV64-NEXT: slli s8, s9, 44 -; RV64-NEXT: vand.vx v28, v28, s4 -; RV64-NEXT: vand.vx v12, v12, s0 -; RV64-NEXT: vsll.vi v4, v4, 24 -; RV64-NEXT: vand.vx v0, v8, s0 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vor.vv v16, v28, v16 -; RV64-NEXT: vor.vv v12, v12, v24 -; RV64-NEXT: vsll.vi v24, v0, 8 -; RV64-NEXT: vsll.vx v8, v8, t0 -; RV64-NEXT: vor.vv v12, v12, v16 -; RV64-NEXT: vor.vv v16, v4, v24 -; RV64-NEXT: vor.vv v8, v20, v8 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v12, v12, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v12, v12, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v12, v12, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vand.vx v12, v8, a2 -; RV64-NEXT: slli s10, s9, 45 -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: slli s11, s9, 46 -; RV64-NEXT: vand.vx v20, v8, a0 -; RV64-NEXT: slli ra, s9, 47 -; RV64-NEXT: slli s3, s9, 48 -; RV64-NEXT: slli s2, s9, 49 -; RV64-NEXT: slli s1, s9, 50 -; RV64-NEXT: slli t6, s9, 51 -; RV64-NEXT: slli t5, s9, 52 -; RV64-NEXT: slli t4, s9, 53 -; RV64-NEXT: slli t3, s9, 54 -; RV64-NEXT: slli t2, s9, 55 -; RV64-NEXT: slli t1, s9, 56 -; RV64-NEXT: slli t0, s9, 57 -; RV64-NEXT: slli a4, s9, 58 -; RV64-NEXT: slli a3, s9, 59 -; RV64-NEXT: slli a2, s9, 60 -; RV64-NEXT: slli a1, s9, 61 -; RV64-NEXT: slli s9, s9, 62 -; RV64-NEXT: li a0, -1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vand.vi v24, v8, 2 -; RV64-NEXT: vand.vi v28, v8, 1 -; RV64-NEXT: vand.vi v4, v8, 4 -; RV64-NEXT: vand.vi v0, v8, 8 -; RV64-NEXT: vmul.vv v24, v8, v24 -; RV64-NEXT: sd a5, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v28 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v4 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v24, v8, v0 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v24, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v16 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vmul.vv v12, v8, v20 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 128 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 256 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 512 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 6 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: li s5, 1024 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 112(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 1 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 2 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 4 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 8 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 16 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 32 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 64 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 128 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 256 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 512 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 1024 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 2048 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 4096 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 8192 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 7 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 16384 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 32768 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 65536 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 131072 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: lui s5, 262144 -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 104(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 96(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 88(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 5 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 80(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 72(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 64(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 4 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 2 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 1 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr a5, vlenb -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: mv s5, a5 -; RV64-NEXT: slli a5, a5, 3 -; RV64-NEXT: add a5, a5, s5 -; RV64-NEXT: add a5, sp, a5 -; RV64-NEXT: addi a5, a5, 128 -; RV64-NEXT: vs4r.v v12, (a5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv a5, s5 -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: add s5, s5, a5 -; RV64-NEXT: ld a5, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: vand.vx v12, v8, s5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s7 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s8 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s10 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 4 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s11 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 1 -; RV64-NEXT: add s6, s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, ra -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s5, vlenb -; RV64-NEXT: slli s5, s5, 3 -; RV64-NEXT: mv s6, s5 -; RV64-NEXT: slli s5, s5, 2 -; RV64-NEXT: add s5, s5, s6 -; RV64-NEXT: add s5, sp, s5 -; RV64-NEXT: addi s5, s5, 128 -; RV64-NEXT: vs4r.v v12, (s5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s3, vlenb -; RV64-NEXT: slli s3, s3, 2 -; RV64-NEXT: mv s5, s3 -; RV64-NEXT: slli s3, s3, 3 -; RV64-NEXT: add s3, s3, s5 -; RV64-NEXT: add s3, sp, s3 -; RV64-NEXT: addi s3, s3, 128 -; RV64-NEXT: vs4r.v v12, (s3) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s2, vlenb -; RV64-NEXT: slli s2, s2, 5 -; RV64-NEXT: add s2, sp, s2 -; RV64-NEXT: addi s2, s2, 128 -; RV64-NEXT: vs4r.v v12, (s2) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, s1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr s1, vlenb -; RV64-NEXT: slli s1, s1, 2 -; RV64-NEXT: mv s2, s1 -; RV64-NEXT: slli s1, s1, 1 -; RV64-NEXT: add s2, s2, s1 -; RV64-NEXT: slli s1, s1, 1 -; RV64-NEXT: add s1, s1, s2 -; RV64-NEXT: add s1, sp, s1 -; RV64-NEXT: addi s1, s1, 128 -; RV64-NEXT: vs4r.v v12, (s1) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t6 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t6, vlenb -; RV64-NEXT: slli t6, t6, 3 -; RV64-NEXT: mv s1, t6 -; RV64-NEXT: slli t6, t6, 1 -; RV64-NEXT: add t6, t6, s1 -; RV64-NEXT: add t6, sp, t6 -; RV64-NEXT: addi t6, t6, 128 -; RV64-NEXT: vs4r.v v12, (t6) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t5 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t5, vlenb -; RV64-NEXT: slli t5, t5, 2 -; RV64-NEXT: mv t6, t5 -; RV64-NEXT: slli t5, t5, 2 -; RV64-NEXT: add t5, t5, t6 -; RV64-NEXT: add t5, sp, t5 -; RV64-NEXT: addi t5, t5, 128 -; RV64-NEXT: vs4r.v v12, (t5) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t4 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t4, vlenb -; RV64-NEXT: slli t4, t4, 4 -; RV64-NEXT: add t4, sp, t4 -; RV64-NEXT: addi t4, t4, 128 -; RV64-NEXT: vs4r.v v12, (t4) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t3 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t3, vlenb -; RV64-NEXT: slli t3, t3, 2 -; RV64-NEXT: mv t4, t3 -; RV64-NEXT: slli t3, t3, 1 -; RV64-NEXT: add t3, t3, t4 -; RV64-NEXT: add t3, sp, t3 -; RV64-NEXT: addi t3, t3, 128 -; RV64-NEXT: vs4r.v v12, (t3) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t2 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t2, vlenb -; RV64-NEXT: slli t2, t2, 3 -; RV64-NEXT: add t2, sp, t2 -; RV64-NEXT: addi t2, t2, 128 -; RV64-NEXT: vs4r.v v12, (t2) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t1 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: csrr t1, vlenb -; RV64-NEXT: slli t1, t1, 2 -; RV64-NEXT: add t1, sp, t1 -; RV64-NEXT: addi t1, t1, 128 -; RV64-NEXT: vs4r.v v12, (t1) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, t0 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: addi t0, sp, 128 -; RV64-NEXT: vs4r.v v12, (t0) # vscale x 32-byte Folded Spill -; RV64-NEXT: vand.vx v12, v8, a4 -; RV64-NEXT: vmul.vv v28, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a3 -; RV64-NEXT: vmul.vv v24, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a2 -; RV64-NEXT: vmul.vv v20, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 -; RV64-NEXT: vmul.vv v16, v8, v12 -; RV64-NEXT: vand.vx v12, v8, s9 -; RV64-NEXT: vmul.vv v12, v8, v12 -; RV64-NEXT: vand.vx v0, v8, a0 -; RV64-NEXT: vmul.vv v8, v8, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v4, v0 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 7 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 5 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v0, v0, v4 -; RV64-NEXT: addi a0, sp, 128 -; RV64-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload -; RV64-NEXT: vxor.vv v4, v0, v4 -; RV64-NEXT: vxor.vv v28, v4, v28 -; RV64-NEXT: vxor.vv v24, v28, v24 -; RV64-NEXT: vxor.vv v20, v24, v20 -; RV64-NEXT: vxor.vv v16, v20, v16 -; RV64-NEXT: vxor.vv v12, v16, v12 -; RV64-NEXT: vxor.vv v8, v12, v8 -; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsrl.vx v12, v8, a0 -; RV64-NEXT: li a1, 40 -; RV64-NEXT: vsrl.vx v16, v8, a1 -; RV64-NEXT: vsrl.vi v20, v8, 24 -; RV64-NEXT: vsrl.vi v24, v8, 8 -; RV64-NEXT: vand.vx v16, v16, s4 -; RV64-NEXT: vor.vv v12, v16, v12 -; RV64-NEXT: vand.vx v16, v8, s0 -; RV64-NEXT: lui a2, 4080 -; RV64-NEXT: vand.vx v20, v20, a2 -; RV64-NEXT: vand.vx v24, v24, s0 -; RV64-NEXT: vor.vv v20, v24, v20 -; RV64-NEXT: vand.vx v24, v8, a2 -; RV64-NEXT: vsll.vi v16, v16, 8 -; RV64-NEXT: vsll.vi v24, v24, 24 -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vsll.vx v24, v8, a0 -; RV64-NEXT: vand.vx v8, v8, s4 -; RV64-NEXT: vsll.vx v8, v8, a1 -; RV64-NEXT: vor.vv v8, v24, v8 -; RV64-NEXT: vor.vv v12, v20, v12 -; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vor.vv v8, v8, v12 -; RV64-NEXT: vsrl.vi v12, v8, 4 -; RV64-NEXT: vand.vx v8, v8, a7 -; RV64-NEXT: vand.vx v12, v12, a7 -; RV64-NEXT: vsll.vi v8, v8, 4 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a6 -; RV64-NEXT: vand.vx v12, v12, a6 -; RV64-NEXT: vsll.vi v8, v8, 2 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v8, v8, a5 -; RV64-NEXT: vand.vx v12, v12, a5 -; RV64-NEXT: vadd.vv v8, v8, v8 -; RV64-NEXT: vor.vv v8, v12, v8 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: slli a0, a0, 2 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a1, a1, a0 -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: ld ra, 232(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 224(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 216(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 208(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 200(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 192(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s5, 184(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s6, 176(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s7, 168(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s8, 160(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s9, 152(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s10, 144(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s11, 136(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 240 -; RV64-NEXT: ret - %a = call <8 x i64> @llvm.clmulr.v8i64(<8 x i64> %x, <8 x i64> %y) - ret <8 x i64> %a -} From 22cdaedfca60f09841b34c93ddd26fc19118b546 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 10:28:07 +0000 Subject: [PATCH 04/13] [ISel] DAGCombine clmul -> clmul[hr] --- llvm/include/llvm/CodeGen/SDPatternMatch.h | 5 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 25 +- llvm/test/CodeGen/RISCV/clmul.ll | 8265 +++++++++++++++++ 4 files changed, 8314 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index a81b91e338cb8..445e025861fd2 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -919,6 +919,11 @@ inline BinaryOpc_match m_Rotr(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::ROTR, L, R); } +template +inline BinaryOpc_match m_Clmul(const LHS &L, const RHS &R) { + return BinaryOpc_match(ISD::CLMUL, L, R); +} + template inline BinaryOpc_match m_FAdd(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::FADD, L, R); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 94afdc5db6613..906f4b2a8ec39 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10315,6 +10315,25 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) return R; + // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue X, Y; + if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))), + m_SpecificInt(VT.getScalarSizeInBits() / 2 - 1)))) + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); + if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))), + m_SpecificInt(VT.getScalarSizeInBits() / 2)))) + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); + + // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> clmulh(x, y). + if (sd_match(N, m_Srl(m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), + m_BitReverse(m_Value(Y)))), + m_SpecificInt(1)))) + return DAG.getNode(ISD::CLMULH, DL, VT, X, Y); + // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize @@ -10350,8 +10369,6 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { return SDValue(); // Attempt to fold the constants, shifting the binop RHS by the shift amount. - SDLoc DL(N); - EVT VT = N->getValueType(0); if (SDValue NewRHS = DAG.FoldConstantArithmetic( N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) { SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), @@ -11771,6 +11788,11 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { sd_match(N, m_BitReverse(m_Shl(m_BitReverse(m_Value(X)), m_Value(Y))))) return DAG.getNode(ISD::SRL, DL, VT, X, Y); + // fold bitreverse(clmul(bitreverse(x), bitreverse(y))) -> clmulr(x, y) + if (sd_match(N, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), + m_BitReverse(m_Value(Y)))))) + return DAG.getNode(ISD::CLMULR, DL, VT, X, Y); + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 79627466bad0d..0014bcf60c0e6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8308,13 +8308,14 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { SDValue X = Node->getOperand(0); SDValue Y = Node->getOperand(1); unsigned BW = VT.getScalarSizeInBits(); + unsigned Opcode = Node->getOpcode(); - if (VT.isVector() && isOperationLegalOrCustomOrPromote( - Node->getOpcode(), VT.getVectorElementType())) + if (VT.isVector() && + isOperationLegalOrCustomOrPromote(Opcode, VT.getVectorElementType())) return DAG.UnrollVectorOp(Node); SDValue Res = DAG.getConstant(0, DL, VT); - switch (Node->getOpcode()) { + switch (Opcode) { case ISD::CLMUL: { for (unsigned I = 0; I < BW; ++I) { SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT); @@ -8327,12 +8328,26 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { case ISD::CLMULR: case ISD::CLMULH: { EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 2 * BW); + // For example, ExtVT = i64 based operations aren't legal on rv32; use + // bitreverse-based lowering in this case. + if (!isOperationLegalOrCustom(ISD::ZERO_EXTEND, ExtVT) || + !isOperationLegalOrCustom(ISD::SRL, ExtVT)) { + SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X); + SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y); + SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev); + Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul); + Res = Opcode == ISD::CLMULR + ? Res + : DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getShiftAmountConstant(1, VT, DL)); + break; + } SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X); SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y); SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt); - unsigned ShtAmt = Node->getOpcode() == ISD::CLMULR ? BW - 1 : BW; + unsigned ShtAmt = Opcode == ISD::CLMULR ? BW - 1 : BW; SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul, - DAG.getShiftAmountConstant(ShtAmt, VT, DL)); + DAG.getShiftAmountConstant(ShtAmt, ExtVT, DL)); Res = DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits); break; } diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll index da4f4d3075133..429d34a0f9851 100644 --- a/llvm/test/CodeGen/RISCV/clmul.ll +++ b/llvm/test/CodeGen/RISCV/clmul.ll @@ -3241,3 +3241,8268 @@ define i16 @clmul_constfold_i16() nounwind { %res = call i16 @llvm.clmul.i16(i16 -2, i16 -1) ret i16 %res } + +define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { +; RV32IM-LABEL: clmulr_i4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui t2, 61681 +; RV32IM-NEXT: lui t3, 209715 +; RV32IM-NEXT: lui a7, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t5, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t6, 1 +; RV32IM-NEXT: lui s0, 4 +; RV32IM-NEXT: lui s1, 8 +; RV32IM-NEXT: lui s2, 32 +; RV32IM-NEXT: lui s3, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi s4, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, s4 +; RV32IM-NEXT: or a3, t0, t1 +; RV32IM-NEXT: lui t0, 32768 +; RV32IM-NEXT: and t1, t4, s4 +; RV32IM-NEXT: or t4, t1, t5 +; RV32IM-NEXT: lui a6, 65536 +; RV32IM-NEXT: and a0, a0, s4 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or t5, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s4 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: addi t2, t2, -241 +; RV32IM-NEXT: addi t3, t3, 819 +; RV32IM-NEXT: addi a7, a7, 1365 +; RV32IM-NEXT: or a3, t5, a3 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: srli t4, a3, 4 +; RV32IM-NEXT: and a3, a3, t2 +; RV32IM-NEXT: srli t5, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and t4, t4, t2 +; RV32IM-NEXT: slli a3, a3, 4 +; RV32IM-NEXT: and t5, t5, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 2 +; RV32IM-NEXT: and a3, a3, t3 +; RV32IM-NEXT: srli t5, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a3, a3, 2 +; RV32IM-NEXT: and t5, t5, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 1 +; RV32IM-NEXT: and a3, a3, a7 +; RV32IM-NEXT: srli t5, a0, 1 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and t4, t4, a7 +; RV32IM-NEXT: and a7, t5, a7 +; RV32IM-NEXT: lui a4, 524288 +; RV32IM-NEXT: slli t6, t6, 11 +; RV32IM-NEXT: slli a3, a3, 1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, a7, a0 +; RV32IM-NEXT: andi t5, a0, 2 +; RV32IM-NEXT: andi t4, a0, 1 +; RV32IM-NEXT: and t6, a0, t6 +; RV32IM-NEXT: lui a7, 1 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a7, 2 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: sw s0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a0, s2 +; RV32IM-NEXT: and a5, a0, s3 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s5 +; RV32IM-NEXT: sw a5, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s6 +; RV32IM-NEXT: sw a5, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s7, a0, s7 +; RV32IM-NEXT: and s8, a0, s8 +; RV32IM-NEXT: and a5, a0, s9 +; RV32IM-NEXT: sw a5, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s10 +; RV32IM-NEXT: sw a5, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s11 +; RV32IM-NEXT: sw a5, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, ra +; RV32IM-NEXT: sw a5, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, t0 +; RV32IM-NEXT: sw a5, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a6 +; RV32IM-NEXT: sw a5, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a4 +; RV32IM-NEXT: sw a4, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 4 +; RV32IM-NEXT: andi a2, a0, 8 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a6, a0, 64 +; RV32IM-NEXT: andi a7, a0, 128 +; RV32IM-NEXT: andi t0, a0, 256 +; RV32IM-NEXT: andi t1, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul t5, a3, t5 +; RV32IM-NEXT: sw t5, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, a3, t4 +; RV32IM-NEXT: mul a1, a3, a1 +; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s6, a3, a2 +; RV32IM-NEXT: mul s5, a3, a4 +; RV32IM-NEXT: mul s3, a3, a5 +; RV32IM-NEXT: mul a1, a3, a6 +; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a3, a7 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s0, a3, t0 +; RV32IM-NEXT: mul t5, a3, t1 +; RV32IM-NEXT: mul s11, a3, a0 +; RV32IM-NEXT: mul a0, a3, t6 +; RV32IM-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a3, a0 +; RV32IM-NEXT: mul s1, a3, s1 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, a3, a0 +; RV32IM-NEXT: mul a0, a3, s2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a3, a0 +; RV32IM-NEXT: mul a6, a3, s7 +; RV32IM-NEXT: mul t4, a3, s8 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: lw a5, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a5 +; RV32IM-NEXT: lw t0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, t0 +; RV32IM-NEXT: lw t6, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, t6 +; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, s2 +; RV32IM-NEXT: lw s8, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s8, a3, s8 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, s9 +; RV32IM-NEXT: lw s9, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s10, s10, s9 +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor s3, s5, s3 +; RV32IM-NEXT: xor t5, s0, t5 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s10, s6 +; RV32IM-NEXT: lw a4, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, s3, a4 +; RV32IM-NEXT: xor t1, t5, s11 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a4 +; RV32IM-NEXT: lw a4, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, t1, a4 +; RV32IM-NEXT: xor a5, a7, ra +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s7 +; RV32IM-NEXT: xor a0, a0, t6 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s2 +; RV32IM-NEXT: xor a4, a1, a4 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a0, a0, s8 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a0, a3 +; RV32IM-NEXT: and a3, a2, s4 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s4 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and a1, a1, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: andi a1, a0, 5 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: andi a0, a0, 5 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -480 +; RV64IM-NEXT: sd ra, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a4, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s8, 16 +; RV64IM-NEXT: srli t1, a0, 56 +; RV64IM-NEXT: srliw t3, a0, 24 +; RV64IM-NEXT: slli t4, a0, 56 +; RV64IM-NEXT: lui s3, 61681 +; RV64IM-NEXT: lui t5, 209715 +; RV64IM-NEXT: lui s6, 349525 +; RV64IM-NEXT: srli s9, a1, 24 +; RV64IM-NEXT: srli s0, a1, 8 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srli t2, a1, 56 +; RV64IM-NEXT: srliw s11, a1, 24 +; RV64IM-NEXT: slli a3, a1, 56 +; RV64IM-NEXT: li t0, 1 +; RV64IM-NEXT: lui s1, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui t6, 4096 +; RV64IM-NEXT: lui s5, 8192 +; RV64IM-NEXT: lui s7, 4080 +; RV64IM-NEXT: and a2, a4, s7 +; RV64IM-NEXT: slli ra, s4, 24 +; RV64IM-NEXT: addi s10, s8, -256 +; RV64IM-NEXT: and a4, a6, ra +; RV64IM-NEXT: sd ra, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a4, a2 +; RV64IM-NEXT: and a4, a0, s7 +; RV64IM-NEXT: slli t3, t3, 32 +; RV64IM-NEXT: addi s3, s3, -241 +; RV64IM-NEXT: addi s4, t5, 819 +; RV64IM-NEXT: addi s6, s6, 1365 +; RV64IM-NEXT: and a6, s9, s7 +; RV64IM-NEXT: and a5, a5, s10 +; RV64IM-NEXT: or a5, a5, t1 +; RV64IM-NEXT: and t1, a1, s7 +; RV64IM-NEXT: slli t5, s11, 32 +; RV64IM-NEXT: slli a4, a4, 24 +; RV64IM-NEXT: or s9, a4, t3 +; RV64IM-NEXT: slli a4, s3, 32 +; RV64IM-NEXT: add s3, s3, a4 +; RV64IM-NEXT: slli a4, s4, 32 +; RV64IM-NEXT: add s4, s4, a4 +; RV64IM-NEXT: slli a4, s6, 32 +; RV64IM-NEXT: add s6, s6, a4 +; RV64IM-NEXT: slli t3, t0, 11 +; RV64IM-NEXT: and a4, s0, ra +; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli s11, t0, 32 +; RV64IM-NEXT: and a6, a7, s10 +; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: slli ra, t0, 33 +; RV64IM-NEXT: slli t1, t1, 24 +; RV64IM-NEXT: or a7, t1, t5 +; RV64IM-NEXT: slli s0, t0, 34 +; RV64IM-NEXT: or a2, a2, a5 +; RV64IM-NEXT: slli a5, t0, 35 +; RV64IM-NEXT: sd a5, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, t4, a0 +; RV64IM-NEXT: slli a5, t0, 36 +; RV64IM-NEXT: sd a5, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli a6, t0, 37 +; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: slli a3, t0, 38 +; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, a7 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s3 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s3 +; RV64IM-NEXT: and a2, a2, s3 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s3 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s4, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s6, a3, a1 +; RV64IM-NEXT: andi a1, s6, 2 +; RV64IM-NEXT: andi a2, s6, 1 +; RV64IM-NEXT: andi a3, s6, 4 +; RV64IM-NEXT: andi a4, s6, 8 +; RV64IM-NEXT: andi a5, s6, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s6, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a7, t0, 39 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 40 +; RV64IM-NEXT: and a2, s6, s1 +; RV64IM-NEXT: and a3, s6, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 41 +; RV64IM-NEXT: and a3, s6, t6 +; RV64IM-NEXT: and a4, s6, s5 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t0, 48 +; RV64IM-NEXT: and a4, s6, s11 +; RV64IM-NEXT: and a5, s6, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t0, 49 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 56 +; RV64IM-NEXT: and a2, s6, a3 +; RV64IM-NEXT: and a3, s6, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 57 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 42 +; RV64IM-NEXT: slli ra, t0, 43 +; RV64IM-NEXT: slli a4, t0, 44 +; RV64IM-NEXT: slli t6, t0, 45 +; RV64IM-NEXT: slli s1, t0, 46 +; RV64IM-NEXT: slli s2, t0, 47 +; RV64IM-NEXT: slli s3, t0, 50 +; RV64IM-NEXT: slli s4, t0, 51 +; RV64IM-NEXT: slli s5, t0, 52 +; RV64IM-NEXT: slli a1, t0, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 59 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 60 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 61 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t0, t0, 62 +; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s6, s9 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s6, s11 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 2048 +; RV64IM-NEXT: and a1, s6, s7 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t1, 16384 +; RV64IM-NEXT: and a1, s6, t1 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 32768 +; RV64IM-NEXT: and t2, s6, t2 +; RV64IM-NEXT: lui t3, 65536 +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 131072 +; RV64IM-NEXT: and a5, s6, t4 +; RV64IM-NEXT: lui t5, 262144 +; RV64IM-NEXT: and t0, s6, t5 +; RV64IM-NEXT: and s11, s6, s0 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a2 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s6, ra +; RV64IM-NEXT: and a1, s6, a4 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t6 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s2 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, s6, s4 +; RV64IM-NEXT: and s1, s6, s5 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s6, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s6, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s6, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, s6, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s6, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s6, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s6, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s6, a1 +; RV64IM-NEXT: andi a1, s6, 64 +; RV64IM-NEXT: andi a2, s6, 128 +; RV64IM-NEXT: andi a3, s6, 1024 +; RV64IM-NEXT: srliw a4, s6, 31 +; RV64IM-NEXT: srli s6, s6, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t3, a0, a3 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: mul t5, a0, t2 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a3, a0, s11 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: mul a5, a0, ra +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s6, s6, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, s4 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, s4, t3 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s4, t1 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, s4, a7 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, s4, a6 +; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s4, a3 +; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s4, s5 +; RV64IM-NEXT: xor t4, s6, t4 +; RV64IM-NEXT: ld s4, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, s4 +; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, s4 +; RV64IM-NEXT: xor a7, a7, t6 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a3, a3, t0 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a5, s5, s7 +; RV64IM-NEXT: ld t0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, t0 +; RV64IM-NEXT: ld t4, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, t4 +; RV64IM-NEXT: ld t4, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t4 +; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t4 +; RV64IM-NEXT: ld t4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t4 +; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a5, a5, s8 +; RV64IM-NEXT: ld t2, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a5, a5, s9 +; RV64IM-NEXT: xor t2, t0, t3 +; RV64IM-NEXT: xor t1, t2, t1 +; RV64IM-NEXT: ld t2, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: xor a2, a2, ra +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a5, a5, s10 +; RV64IM-NEXT: xor a7, t1, a7 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: ld a6, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a6 +; RV64IM-NEXT: ld a6, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a6 +; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: xor a0, a5, a0 +; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a7, t1 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: or a4, t0, a5 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a3, a7 +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld a6, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a6 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a1, a7, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a5, a6 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: andi a1, a0, 5 +; RV64IM-NEXT: srli a0, a0, 1 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: andi a0, a0, 5 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 480 +; RV64IM-NEXT: ret + %a.ext = zext i4 %a to i8 + %b.ext = zext i4 %b to i8 + %clmul = call i8 @llvm.clmul.i8(i8 %a.ext, i8 %b.ext) + %res.ext = lshr i8 %clmul, 3 + %res = trunc i8 %res.ext to i4 + ret i4 %res +} + +define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { +; RV32IM-LABEL: clmulr_i4_bitreverse: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui t3, 61681 +; RV32IM-NEXT: lui t5, 209715 +; RV32IM-NEXT: lui t6, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli a4, a1, 24 +; RV32IM-NEXT: slli a5, a1, 24 +; RV32IM-NEXT: li s7, 1 +; RV32IM-NEXT: lui t2, 4 +; RV32IM-NEXT: lui s0, 8 +; RV32IM-NEXT: lui s1, 32 +; RV32IM-NEXT: lui s2, 64 +; RV32IM-NEXT: lui s3, 128 +; RV32IM-NEXT: lui s4, 256 +; RV32IM-NEXT: lui s8, 512 +; RV32IM-NEXT: lui a7, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi s5, a3, -256 +; RV32IM-NEXT: and t0, t0, s5 +; RV32IM-NEXT: or t1, t0, t1 +; RV32IM-NEXT: lui a6, 32768 +; RV32IM-NEXT: and t4, t4, s5 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: lui t0, 65536 +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or t4, a5, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: or a0, a0, t1 +; RV32IM-NEXT: lui a5, 524288 +; RV32IM-NEXT: addi t3, t3, -241 +; RV32IM-NEXT: addi t5, t5, 819 +; RV32IM-NEXT: addi t6, t6, 1365 +; RV32IM-NEXT: slli s7, s7, 11 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 4 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 4 +; RV32IM-NEXT: and a4, a4, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a4, a4, 4 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 2 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 2 +; RV32IM-NEXT: and a4, a4, t5 +; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: slli a4, a4, 2 +; RV32IM-NEXT: or t4, t4, a4 +; RV32IM-NEXT: srli a4, a0, 1 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a4, a4, t6 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a4, a4, a0 +; RV32IM-NEXT: srli a0, t4, 1 +; RV32IM-NEXT: and t4, t4, t6 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: slli t4, t4, 1 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: andi t4, a0, 2 +; RV32IM-NEXT: and s6, a0, s7 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t1, 2 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t1, a0, t2 +; RV32IM-NEXT: sw t1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and a3, a0, a3 +; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s2 +; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s3, a0, s3 +; RV32IM-NEXT: and a3, a0, s4 +; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s8 +; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a7 +; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s9, a0, s9 +; RV32IM-NEXT: and a3, a0, s10 +; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s11 +; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, ra +; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a6 +; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, t0 +; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 1 +; RV32IM-NEXT: andi a2, a0, 4 +; RV32IM-NEXT: andi a3, a0, 8 +; RV32IM-NEXT: andi a5, a0, 16 +; RV32IM-NEXT: andi a6, a0, 32 +; RV32IM-NEXT: andi a7, a0, 64 +; RV32IM-NEXT: andi t0, a0, 128 +; RV32IM-NEXT: andi t1, a0, 256 +; RV32IM-NEXT: andi t2, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul t4, a4, t4 +; RV32IM-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, a4, a1 +; RV32IM-NEXT: mul s11, a4, a2 +; RV32IM-NEXT: mul s8, a4, a3 +; RV32IM-NEXT: mul s7, a4, a5 +; RV32IM-NEXT: mul s4, a4, a6 +; RV32IM-NEXT: mul a1, a4, a7 +; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a4, t0 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s2, a4, t1 +; RV32IM-NEXT: mul t2, a4, t2 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, s6 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a4, a0 +; RV32IM-NEXT: mul s1, a4, s0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, a4, s3 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a6, a4, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a4, a0 +; RV32IM-NEXT: mul s6, a4, s9 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a4, a5 +; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a4, t0 +; RV32IM-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a4, s0 +; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a4, s3 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s9, a4, s9 +; RV32IM-NEXT: lw s10, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a4, s10 +; RV32IM-NEXT: lw s10, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor ra, ra, s10 +; RV32IM-NEXT: xor s8, s11, s8 +; RV32IM-NEXT: xor s4, s7, s4 +; RV32IM-NEXT: xor t2, s2, t2 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, ra, s8 +; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s4, a3 +; RV32IM-NEXT: lw t1, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t2, t1 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, t1, a3 +; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a7, a5 +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s6 +; RV32IM-NEXT: xor a0, a0, s0 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s3 +; RV32IM-NEXT: xor a3, a1, a3 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: xor a0, a0, s9 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: and a3, a2, s5 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s5 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and a1, a1, t5 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a1, a1, t6 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i4_bitreverse: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -496 +; RV64IM-NEXT: sd ra, 488(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 480(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a5, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: srli a7, a0, 56 +; RV64IM-NEXT: srliw t2, a0, 24 +; RV64IM-NEXT: slli t3, a0, 56 +; RV64IM-NEXT: lui t4, 61681 +; RV64IM-NEXT: lui s6, 209715 +; RV64IM-NEXT: lui s5, 349525 +; RV64IM-NEXT: srli s3, a1, 24 +; RV64IM-NEXT: srli t6, a1, 8 +; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli t0, a1, 56 +; RV64IM-NEXT: srliw s7, a1, 24 +; RV64IM-NEXT: slli a3, a1, 56 +; RV64IM-NEXT: li t1, 1 +; RV64IM-NEXT: lui s1, 256 +; RV64IM-NEXT: lui s2, 4096 +; RV64IM-NEXT: lui s0, 8192 +; RV64IM-NEXT: lui s9, 4080 +; RV64IM-NEXT: and a2, a5, s9 +; RV64IM-NEXT: slli t5, s4, 24 +; RV64IM-NEXT: addi s11, s10, -256 +; RV64IM-NEXT: and a5, a6, t5 +; RV64IM-NEXT: sd t5, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a5, a2 +; RV64IM-NEXT: and a5, a0, s9 +; RV64IM-NEXT: slli t2, t2, 32 +; RV64IM-NEXT: addi s4, t4, -241 +; RV64IM-NEXT: addi s6, s6, 819 +; RV64IM-NEXT: addi s8, s5, 1365 +; RV64IM-NEXT: and a6, s3, s9 +; RV64IM-NEXT: and a4, a4, s11 +; RV64IM-NEXT: or a4, a4, a7 +; RV64IM-NEXT: and a7, a1, s9 +; RV64IM-NEXT: slli t4, s7, 32 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: or s5, a5, t2 +; RV64IM-NEXT: slli a5, s4, 32 +; RV64IM-NEXT: add s4, s4, a5 +; RV64IM-NEXT: slli a5, s6, 32 +; RV64IM-NEXT: add s6, s6, a5 +; RV64IM-NEXT: slli a5, s8, 32 +; RV64IM-NEXT: add s8, s8, a5 +; RV64IM-NEXT: slli s3, t1, 11 +; RV64IM-NEXT: and a5, t6, t5 +; RV64IM-NEXT: or a5, a5, a6 +; RV64IM-NEXT: slli t2, t1, 32 +; RV64IM-NEXT: and a6, ra, s11 +; RV64IM-NEXT: or a6, a6, t0 +; RV64IM-NEXT: slli ra, t1, 33 +; RV64IM-NEXT: slli a7, a7, 24 +; RV64IM-NEXT: or a7, a7, t4 +; RV64IM-NEXT: slli s7, t1, 34 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t1, 35 +; RV64IM-NEXT: sd a4, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s11 +; RV64IM-NEXT: sd s11, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, t3, a0 +; RV64IM-NEXT: slli a4, t1, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a4, a5, a6 +; RV64IM-NEXT: slli a6, t1, 37 +; RV64IM-NEXT: and a1, a1, s11 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: or a0, a0, s5 +; RV64IM-NEXT: or a1, a1, a7 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s4, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s6, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: sd s8, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: and a2, a2, s8 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s8 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s5, a3, a1 +; RV64IM-NEXT: andi a1, s5, 2 +; RV64IM-NEXT: andi a2, s5, 1 +; RV64IM-NEXT: andi a3, s5, 4 +; RV64IM-NEXT: andi a4, s5, 8 +; RV64IM-NEXT: andi a5, s5, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s5, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, t1, 38 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 40 +; RV64IM-NEXT: lui a2, 128 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: and a3, s5, s1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 41 +; RV64IM-NEXT: and a3, s5, s2 +; RV64IM-NEXT: and a4, s5, s0 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t1, 48 +; RV64IM-NEXT: and a4, s5, t2 +; RV64IM-NEXT: and a5, s5, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t1, 49 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 56 +; RV64IM-NEXT: and a2, s5, a3 +; RV64IM-NEXT: and a3, s5, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 57 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 39 +; RV64IM-NEXT: slli ra, t1, 42 +; RV64IM-NEXT: slli a4, t1, 43 +; RV64IM-NEXT: slli a5, t1, 44 +; RV64IM-NEXT: slli s0, t1, 45 +; RV64IM-NEXT: slli s1, t1, 46 +; RV64IM-NEXT: slli s2, t1, 47 +; RV64IM-NEXT: slli s6, t1, 50 +; RV64IM-NEXT: slli a1, t1, 51 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 59 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 60 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 61 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, t1, 62 +; RV64IM-NEXT: sd t1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, s5, s3 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s10 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 32 +; RV64IM-NEXT: and a1, s5, s8 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s5, s11 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s5, s10 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 1024 +; RV64IM-NEXT: and a1, s5, s9 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t0, 2048 +; RV64IM-NEXT: and a1, s5, t0 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 16384 +; RV64IM-NEXT: and a1, s5, t2 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t3, 32768 +; RV64IM-NEXT: and t3, s5, t3 +; RV64IM-NEXT: lui t4, 65536 +; RV64IM-NEXT: and a1, s5, t4 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t5, 131072 +; RV64IM-NEXT: and a7, s5, t5 +; RV64IM-NEXT: lui t6, 262144 +; RV64IM-NEXT: and t6, s5, t6 +; RV64IM-NEXT: and s11, s5, s7 +; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a6 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s4 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a2 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s5, ra +; RV64IM-NEXT: and a1, s5, a4 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a5 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s0 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s6 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s0, s5, a1 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s5, a1 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s5, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s5, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s5, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, s5, a1 +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s5, a1 +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s5, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s5, a1 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s5, a1 +; RV64IM-NEXT: andi a1, s5, 64 +; RV64IM-NEXT: andi a2, s5, 128 +; RV64IM-NEXT: andi a3, s5, 1024 +; RV64IM-NEXT: srliw a4, s5, 31 +; RV64IM-NEXT: srli s5, s5, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t2, a0, a3 +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a5, a0, a1 +; RV64IM-NEXT: mul t3, a0, t3 +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a7 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t6 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a2, a0, s11 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul ra, a0, ra +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, a0, a1 +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s5, s5, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s5 +; RV64IM-NEXT: ld s5, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s5, s4 +; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s4, t2 +; RV64IM-NEXT: ld s4, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, s4, t0 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, s4, a6 +; RV64IM-NEXT: ld s4, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s4, a5 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor ra, s4, ra +; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s4, s6 +; RV64IM-NEXT: xor t4, s5, t4 +; RV64IM-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, s4 +; RV64IM-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, s4 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a5, a5, t3 +; RV64IM-NEXT: xor a2, a2, a7 +; RV64IM-NEXT: xor a3, ra, a3 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a7, s6, s7 +; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, t3 +; RV64IM-NEXT: ld t4, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, t4 +; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, t4 +; RV64IM-NEXT: ld t4, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t4 +; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t4 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a7, a7, s8 +; RV64IM-NEXT: ld t1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, t1 +; RV64IM-NEXT: ld t1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t1 +; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: xor a3, a3, t6 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a7, a7, s9 +; RV64IM-NEXT: xor t1, t3, t2 +; RV64IM-NEXT: xor t0, t1, t0 +; RV64IM-NEXT: ld t1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t1 +; RV64IM-NEXT: ld t1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: ld t1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a7, a7, s10 +; RV64IM-NEXT: xor a6, t0, a6 +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: ld a5, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: ld a5, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a5 +; RV64IM-NEXT: slli t3, t3, 56 +; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a5 +; RV64IM-NEXT: xor a0, a7, a0 +; RV64IM-NEXT: ld t0, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a6, t0 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: xor a2, a4, a2 +; RV64IM-NEXT: or a4, t3, a5 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a2, a7 +; RV64IM-NEXT: xor a3, a2, a3 +; RV64IM-NEXT: srli a2, a2, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: ld a6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a2, a2, a6 +; RV64IM-NEXT: srli a3, a3, 24 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: and a1, a7, t0 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a5, a6 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 488(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 480(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 496 +; RV64IM-NEXT: ret + %a.rev = call i4 @llvm.bitreverse.i4(i4 %a) + %b.rev = call i4 @llvm.bitreverse.i4(i4 %b) + %res.rev = call i4 @llvm.clmul.i4(i4 %a.rev, i4 %b.rev) + %res = call i4 @llvm.bitreverse.i4(i4 %res.rev) + ret i4 %res +} + +define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { +; RV32IM-LABEL: clmulr_i8: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui t2, 61681 +; RV32IM-NEXT: lui t3, 209715 +; RV32IM-NEXT: lui a7, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t5, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t6, 1 +; RV32IM-NEXT: lui s0, 4 +; RV32IM-NEXT: lui s1, 8 +; RV32IM-NEXT: lui s2, 32 +; RV32IM-NEXT: lui s3, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi s4, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, s4 +; RV32IM-NEXT: or a3, t0, t1 +; RV32IM-NEXT: lui t0, 32768 +; RV32IM-NEXT: and t1, t4, s4 +; RV32IM-NEXT: or t4, t1, t5 +; RV32IM-NEXT: lui a6, 65536 +; RV32IM-NEXT: and a0, a0, s4 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or t5, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s4 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: addi t2, t2, -241 +; RV32IM-NEXT: addi t3, t3, 819 +; RV32IM-NEXT: addi a7, a7, 1365 +; RV32IM-NEXT: or a3, t5, a3 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: srli t4, a3, 4 +; RV32IM-NEXT: and a3, a3, t2 +; RV32IM-NEXT: srli t5, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and t4, t4, t2 +; RV32IM-NEXT: slli a3, a3, 4 +; RV32IM-NEXT: and t5, t5, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 2 +; RV32IM-NEXT: and a3, a3, t3 +; RV32IM-NEXT: srli t5, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a3, a3, 2 +; RV32IM-NEXT: and t5, t5, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 1 +; RV32IM-NEXT: and a3, a3, a7 +; RV32IM-NEXT: srli t5, a0, 1 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and t4, t4, a7 +; RV32IM-NEXT: and a7, t5, a7 +; RV32IM-NEXT: lui a4, 524288 +; RV32IM-NEXT: slli t6, t6, 11 +; RV32IM-NEXT: slli a3, a3, 1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, a7, a0 +; RV32IM-NEXT: andi t5, a0, 2 +; RV32IM-NEXT: andi t4, a0, 1 +; RV32IM-NEXT: and t6, a0, t6 +; RV32IM-NEXT: lui a7, 1 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a7, 2 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: sw s0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a0, s2 +; RV32IM-NEXT: and a5, a0, s3 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s5 +; RV32IM-NEXT: sw a5, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s6 +; RV32IM-NEXT: sw a5, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s7, a0, s7 +; RV32IM-NEXT: and s8, a0, s8 +; RV32IM-NEXT: and a5, a0, s9 +; RV32IM-NEXT: sw a5, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s10 +; RV32IM-NEXT: sw a5, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s11 +; RV32IM-NEXT: sw a5, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, ra +; RV32IM-NEXT: sw a5, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, t0 +; RV32IM-NEXT: sw a5, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a6 +; RV32IM-NEXT: sw a5, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a4 +; RV32IM-NEXT: sw a4, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 4 +; RV32IM-NEXT: andi a2, a0, 8 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a6, a0, 64 +; RV32IM-NEXT: andi a7, a0, 128 +; RV32IM-NEXT: andi t0, a0, 256 +; RV32IM-NEXT: andi t1, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul t5, a3, t5 +; RV32IM-NEXT: sw t5, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, a3, t4 +; RV32IM-NEXT: mul a1, a3, a1 +; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s6, a3, a2 +; RV32IM-NEXT: mul s5, a3, a4 +; RV32IM-NEXT: mul s3, a3, a5 +; RV32IM-NEXT: mul a1, a3, a6 +; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a3, a7 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s0, a3, t0 +; RV32IM-NEXT: mul t5, a3, t1 +; RV32IM-NEXT: mul s11, a3, a0 +; RV32IM-NEXT: mul a0, a3, t6 +; RV32IM-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a3, a0 +; RV32IM-NEXT: mul s1, a3, s1 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, a3, a0 +; RV32IM-NEXT: mul a0, a3, s2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a3, a0 +; RV32IM-NEXT: mul a6, a3, s7 +; RV32IM-NEXT: mul t4, a3, s8 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: lw a5, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a5 +; RV32IM-NEXT: lw t0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, t0 +; RV32IM-NEXT: lw t6, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, t6 +; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, s2 +; RV32IM-NEXT: lw s8, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s8, a3, s8 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, s9 +; RV32IM-NEXT: lw s9, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s10, s10, s9 +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor s3, s5, s3 +; RV32IM-NEXT: xor t5, s0, t5 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s10, s6 +; RV32IM-NEXT: lw a4, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, s3, a4 +; RV32IM-NEXT: xor t1, t5, s11 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a4 +; RV32IM-NEXT: lw a4, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, t1, a4 +; RV32IM-NEXT: xor a5, a7, ra +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s7 +; RV32IM-NEXT: xor a0, a0, t6 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s2 +; RV32IM-NEXT: xor a4, a1, a4 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a0, a0, s8 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a0, a3 +; RV32IM-NEXT: and a3, a2, s4 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s4 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and a1, a1, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: andi a1, a0, 85 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: andi a0, a0, 85 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i8: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -480 +; RV64IM-NEXT: sd ra, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a4, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s8, 16 +; RV64IM-NEXT: srli t1, a0, 56 +; RV64IM-NEXT: srliw t3, a0, 24 +; RV64IM-NEXT: slli t4, a0, 56 +; RV64IM-NEXT: lui s3, 61681 +; RV64IM-NEXT: lui t5, 209715 +; RV64IM-NEXT: lui s6, 349525 +; RV64IM-NEXT: srli s9, a1, 24 +; RV64IM-NEXT: srli s0, a1, 8 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srli t2, a1, 56 +; RV64IM-NEXT: srliw s11, a1, 24 +; RV64IM-NEXT: slli a3, a1, 56 +; RV64IM-NEXT: li t0, 1 +; RV64IM-NEXT: lui s1, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui t6, 4096 +; RV64IM-NEXT: lui s5, 8192 +; RV64IM-NEXT: lui s7, 4080 +; RV64IM-NEXT: and a2, a4, s7 +; RV64IM-NEXT: slli ra, s4, 24 +; RV64IM-NEXT: addi s10, s8, -256 +; RV64IM-NEXT: and a4, a6, ra +; RV64IM-NEXT: sd ra, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a4, a2 +; RV64IM-NEXT: and a4, a0, s7 +; RV64IM-NEXT: slli t3, t3, 32 +; RV64IM-NEXT: addi s3, s3, -241 +; RV64IM-NEXT: addi s4, t5, 819 +; RV64IM-NEXT: addi s6, s6, 1365 +; RV64IM-NEXT: and a6, s9, s7 +; RV64IM-NEXT: and a5, a5, s10 +; RV64IM-NEXT: or a5, a5, t1 +; RV64IM-NEXT: and t1, a1, s7 +; RV64IM-NEXT: slli t5, s11, 32 +; RV64IM-NEXT: slli a4, a4, 24 +; RV64IM-NEXT: or s9, a4, t3 +; RV64IM-NEXT: slli a4, s3, 32 +; RV64IM-NEXT: add s3, s3, a4 +; RV64IM-NEXT: slli a4, s4, 32 +; RV64IM-NEXT: add s4, s4, a4 +; RV64IM-NEXT: slli a4, s6, 32 +; RV64IM-NEXT: add s6, s6, a4 +; RV64IM-NEXT: slli t3, t0, 11 +; RV64IM-NEXT: and a4, s0, ra +; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli s11, t0, 32 +; RV64IM-NEXT: and a6, a7, s10 +; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: slli ra, t0, 33 +; RV64IM-NEXT: slli t1, t1, 24 +; RV64IM-NEXT: or a7, t1, t5 +; RV64IM-NEXT: slli s0, t0, 34 +; RV64IM-NEXT: or a2, a2, a5 +; RV64IM-NEXT: slli a5, t0, 35 +; RV64IM-NEXT: sd a5, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, t4, a0 +; RV64IM-NEXT: slli a5, t0, 36 +; RV64IM-NEXT: sd a5, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli a6, t0, 37 +; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: slli a3, t0, 38 +; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, a7 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s3 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s3 +; RV64IM-NEXT: and a2, a2, s3 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s3 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s4, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s6, a3, a1 +; RV64IM-NEXT: andi a1, s6, 2 +; RV64IM-NEXT: andi a2, s6, 1 +; RV64IM-NEXT: andi a3, s6, 4 +; RV64IM-NEXT: andi a4, s6, 8 +; RV64IM-NEXT: andi a5, s6, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s6, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a7, t0, 39 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 40 +; RV64IM-NEXT: and a2, s6, s1 +; RV64IM-NEXT: and a3, s6, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 41 +; RV64IM-NEXT: and a3, s6, t6 +; RV64IM-NEXT: and a4, s6, s5 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t0, 48 +; RV64IM-NEXT: and a4, s6, s11 +; RV64IM-NEXT: and a5, s6, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t0, 49 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 56 +; RV64IM-NEXT: and a2, s6, a3 +; RV64IM-NEXT: and a3, s6, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 57 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 42 +; RV64IM-NEXT: slli ra, t0, 43 +; RV64IM-NEXT: slli a4, t0, 44 +; RV64IM-NEXT: slli t6, t0, 45 +; RV64IM-NEXT: slli s1, t0, 46 +; RV64IM-NEXT: slli s2, t0, 47 +; RV64IM-NEXT: slli s3, t0, 50 +; RV64IM-NEXT: slli s4, t0, 51 +; RV64IM-NEXT: slli s5, t0, 52 +; RV64IM-NEXT: slli a1, t0, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 59 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 60 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 61 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t0, t0, 62 +; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s6, s9 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s6, s11 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 2048 +; RV64IM-NEXT: and a1, s6, s7 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t1, 16384 +; RV64IM-NEXT: and a1, s6, t1 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 32768 +; RV64IM-NEXT: and t2, s6, t2 +; RV64IM-NEXT: lui t3, 65536 +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 131072 +; RV64IM-NEXT: and a5, s6, t4 +; RV64IM-NEXT: lui t5, 262144 +; RV64IM-NEXT: and t0, s6, t5 +; RV64IM-NEXT: and s11, s6, s0 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a2 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s6, ra +; RV64IM-NEXT: and a1, s6, a4 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t6 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s2 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, s6, s4 +; RV64IM-NEXT: and s1, s6, s5 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s6, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s6, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s6, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, s6, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s6, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s6, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s6, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s6, a1 +; RV64IM-NEXT: andi a1, s6, 64 +; RV64IM-NEXT: andi a2, s6, 128 +; RV64IM-NEXT: andi a3, s6, 1024 +; RV64IM-NEXT: srliw a4, s6, 31 +; RV64IM-NEXT: srli s6, s6, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t3, a0, a3 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: mul t5, a0, t2 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a3, a0, s11 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: mul a5, a0, ra +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s6, s6, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, s4 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, s4, t3 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s4, t1 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, s4, a7 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, s4, a6 +; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s4, a3 +; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s4, s5 +; RV64IM-NEXT: xor t4, s6, t4 +; RV64IM-NEXT: ld s4, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, s4 +; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, s4 +; RV64IM-NEXT: xor a7, a7, t6 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a3, a3, t0 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a5, s5, s7 +; RV64IM-NEXT: ld t0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, t0 +; RV64IM-NEXT: ld t4, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, t4 +; RV64IM-NEXT: ld t4, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t4 +; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t4 +; RV64IM-NEXT: ld t4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t4 +; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a5, a5, s8 +; RV64IM-NEXT: ld t2, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a5, a5, s9 +; RV64IM-NEXT: xor t2, t0, t3 +; RV64IM-NEXT: xor t1, t2, t1 +; RV64IM-NEXT: ld t2, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: xor a2, a2, ra +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a5, a5, s10 +; RV64IM-NEXT: xor a7, t1, a7 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: ld a6, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a6 +; RV64IM-NEXT: ld a6, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a6 +; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: xor a0, a5, a0 +; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a7, t1 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: or a4, t0, a5 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a3, a7 +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld a6, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a6 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a1, a7, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a5, a6 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: andi a1, a0, 85 +; RV64IM-NEXT: srli a0, a0, 1 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: andi a0, a0, 85 +; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 480 +; RV64IM-NEXT: ret + %a.ext = zext i8 %a to i16 + %b.ext = zext i8 %b to i16 + %clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext) + %res.ext = lshr i16 %clmul, 7 + %res = trunc i16 %res.ext to i8 + ret i8 %res +} + +define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { +; RV32IM-LABEL: clmulr_i16: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui ra, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui s6, 61681 +; RV32IM-NEXT: lui t3, 209715 +; RV32IM-NEXT: lui a4, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t5, a1, 24 +; RV32IM-NEXT: slli a5, a1, 24 +; RV32IM-NEXT: li t6, 1 +; RV32IM-NEXT: lui a7, 2 +; RV32IM-NEXT: lui a6, 4 +; RV32IM-NEXT: lui t2, 8 +; RV32IM-NEXT: lui s1, 32 +; RV32IM-NEXT: lui s0, 64 +; RV32IM-NEXT: lui s3, 128 +; RV32IM-NEXT: lui s4, 256 +; RV32IM-NEXT: lui s5, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s7, 2048 +; RV32IM-NEXT: lui s9, 4096 +; RV32IM-NEXT: lui s10, 8192 +; RV32IM-NEXT: lui s11, 16384 +; RV32IM-NEXT: addi s2, ra, -256 +; RV32IM-NEXT: sw s2, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t0, t0, s2 +; RV32IM-NEXT: or t1, t0, t1 +; RV32IM-NEXT: lui a3, 32768 +; RV32IM-NEXT: and t4, t4, s2 +; RV32IM-NEXT: or t5, t4, t5 +; RV32IM-NEXT: lui t0, 65536 +; RV32IM-NEXT: and a0, a0, s2 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a2, a2, a0 +; RV32IM-NEXT: lui t4, 131072 +; RV32IM-NEXT: and a1, a1, s2 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a5, a1 +; RV32IM-NEXT: lui a5, 262144 +; RV32IM-NEXT: addi s2, s6, -241 +; RV32IM-NEXT: addi s6, t3, 819 +; RV32IM-NEXT: addi a4, a4, 1365 +; RV32IM-NEXT: or a2, a2, t1 +; RV32IM-NEXT: or a0, a0, t5 +; RV32IM-NEXT: srli t1, a2, 4 +; RV32IM-NEXT: and a2, a2, s2 +; RV32IM-NEXT: srli t5, a0, 4 +; RV32IM-NEXT: and a0, a0, s2 +; RV32IM-NEXT: and t1, t1, s2 +; RV32IM-NEXT: slli a2, a2, 4 +; RV32IM-NEXT: and t5, t5, s2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a2, t1, a2 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t1, a2, 2 +; RV32IM-NEXT: and a2, a2, s6 +; RV32IM-NEXT: srli t5, a0, 2 +; RV32IM-NEXT: and a0, a0, s6 +; RV32IM-NEXT: and t1, t1, s6 +; RV32IM-NEXT: slli a2, a2, 2 +; RV32IM-NEXT: and t5, t5, s6 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a2, t1, a2 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t1, a2, 1 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: srli t5, a0, 1 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and t1, t1, a4 +; RV32IM-NEXT: and t5, t5, a4 +; RV32IM-NEXT: lui a1, 524288 +; RV32IM-NEXT: slli t6, t6, 11 +; RV32IM-NEXT: slli a2, a2, 1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a4, t1, a2 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: andi t3, a0, 2 +; RV32IM-NEXT: andi t5, a0, 1 +; RV32IM-NEXT: and t6, a0, t6 +; RV32IM-NEXT: lui a2, 1 +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a7 +; RV32IM-NEXT: sw a2, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a6 +; RV32IM-NEXT: sw a2, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t2, a0, t2 +; RV32IM-NEXT: and ra, a0, ra +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and s3, a0, s3 +; RV32IM-NEXT: and a2, a0, s4 +; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s5 +; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s8 +; RV32IM-NEXT: sw a2, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s7 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s9 +; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s10 +; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s11 +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a3 +; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, t0 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, t4 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 4 +; RV32IM-NEXT: andi a2, a0, 8 +; RV32IM-NEXT: andi a3, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a6, a0, 64 +; RV32IM-NEXT: andi a7, a0, 128 +; RV32IM-NEXT: andi t0, a0, 256 +; RV32IM-NEXT: andi t1, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul s11, a4, t3 +; RV32IM-NEXT: mul s9, a4, t5 +; RV32IM-NEXT: mul s8, a4, a1 +; RV32IM-NEXT: mul s4, a4, a2 +; RV32IM-NEXT: mul s5, a4, a3 +; RV32IM-NEXT: mul s1, a4, a5 +; RV32IM-NEXT: mul a1, a4, a6 +; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a4, a7 +; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t5, a4, t0 +; RV32IM-NEXT: mul t3, a4, t1 +; RV32IM-NEXT: mul s10, a4, a0 +; RV32IM-NEXT: mul a0, a4, t6 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a4, a0 +; RV32IM-NEXT: mul t6, a4, t2 +; RV32IM-NEXT: mul s7, a4, ra +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, s0 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, a4, s3 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a6, a4, a0 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t2, a4, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a4, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a4, a5 +; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a4, t0 +; RV32IM-NEXT: lw t4, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a4, t4 +; RV32IM-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a4, s0 +; RV32IM-NEXT: lw ra, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, a4, ra +; RV32IM-NEXT: sw ra, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a4, ra +; RV32IM-NEXT: xor s9, s9, s11 +; RV32IM-NEXT: xor s4, s8, s4 +; RV32IM-NEXT: xor s1, s5, s1 +; RV32IM-NEXT: xor t3, t5, t3 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s9, s4 +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s1, a3 +; RV32IM-NEXT: xor t1, t3, s10 +; RV32IM-NEXT: xor a7, a7, t6 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, t1, a3 +; RV32IM-NEXT: xor a5, a7, s7 +; RV32IM-NEXT: xor a2, a2, t2 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a6 +; RV32IM-NEXT: lw a6, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s3 +; RV32IM-NEXT: xor a0, a0, t4 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s0 +; RV32IM-NEXT: xor a3, a1, a3 +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: lui a5, 5 +; RV32IM-NEXT: addi a5, a5, 1365 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a6 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, a6 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, s2 +; RV32IM-NEXT: and a1, a1, s2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, s6 +; RV32IM-NEXT: and a1, a1, s6 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: and a0, a0, a5 +; RV32IM-NEXT: and a1, a1, a5 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i16: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -480 +; RV64IM-NEXT: sd ra, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: srli t1, a0, 56 +; RV64IM-NEXT: srliw t4, a0, 24 +; RV64IM-NEXT: slli a5, a0, 56 +; RV64IM-NEXT: lui s3, 61681 +; RV64IM-NEXT: lui t5, 209715 +; RV64IM-NEXT: lui s6, 349525 +; RV64IM-NEXT: srli s9, a1, 24 +; RV64IM-NEXT: srli s0, a1, 8 +; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli t2, a1, 56 +; RV64IM-NEXT: srliw s11, a1, 24 +; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: li t0, 1 +; RV64IM-NEXT: lui s1, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui t6, 4096 +; RV64IM-NEXT: lui s5, 8192 +; RV64IM-NEXT: lui s7, 4080 +; RV64IM-NEXT: and a2, a3, s7 +; RV64IM-NEXT: slli t3, s4, 24 +; RV64IM-NEXT: addi s8, s10, -256 +; RV64IM-NEXT: and a3, a7, t3 +; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s7 +; RV64IM-NEXT: slli t4, t4, 32 +; RV64IM-NEXT: addi s3, s3, -241 +; RV64IM-NEXT: addi s4, t5, 819 +; RV64IM-NEXT: addi s6, s6, 1365 +; RV64IM-NEXT: and a7, s9, s7 +; RV64IM-NEXT: and a4, a4, s8 +; RV64IM-NEXT: or a4, a4, t1 +; RV64IM-NEXT: and t1, a1, s7 +; RV64IM-NEXT: slli t5, s11, 32 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s9, a3, t4 +; RV64IM-NEXT: slli a3, s3, 32 +; RV64IM-NEXT: add s3, s3, a3 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli t4, t0, 11 +; RV64IM-NEXT: and a3, s0, t3 +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli s11, t0, 32 +; RV64IM-NEXT: and a7, ra, s8 +; RV64IM-NEXT: or a7, a7, t2 +; RV64IM-NEXT: slli ra, t0, 33 +; RV64IM-NEXT: slli t1, t1, 24 +; RV64IM-NEXT: or t1, t1, t5 +; RV64IM-NEXT: slli s0, t0, 34 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t0, 35 +; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a5, a0 +; RV64IM-NEXT: slli a4, t0, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a7, t0, 37 +; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: slli a6, t0, 38 +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, t1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s3 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s3 +; RV64IM-NEXT: and a2, a2, s3 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s3 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s4, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s6, a3, a1 +; RV64IM-NEXT: andi a1, s6, 2 +; RV64IM-NEXT: andi a2, s6, 1 +; RV64IM-NEXT: andi a3, s6, 4 +; RV64IM-NEXT: andi a4, s6, 8 +; RV64IM-NEXT: andi a5, s6, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s6, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s3, t0, 39 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 40 +; RV64IM-NEXT: and a2, s6, s1 +; RV64IM-NEXT: and a3, s6, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 41 +; RV64IM-NEXT: and a3, s6, t6 +; RV64IM-NEXT: and a4, s6, s5 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t0, 48 +; RV64IM-NEXT: and a4, s6, s11 +; RV64IM-NEXT: and a5, s6, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t0, 49 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 56 +; RV64IM-NEXT: and a2, s6, a3 +; RV64IM-NEXT: and a3, s6, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 57 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 42 +; RV64IM-NEXT: slli ra, t0, 43 +; RV64IM-NEXT: slli a4, t0, 44 +; RV64IM-NEXT: slli t6, t0, 45 +; RV64IM-NEXT: slli s1, t0, 46 +; RV64IM-NEXT: slli s2, t0, 47 +; RV64IM-NEXT: slli s4, t0, 50 +; RV64IM-NEXT: slli s5, t0, 51 +; RV64IM-NEXT: slli a1, t0, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 59 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 60 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 61 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t0, t0, 62 +; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t4 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s6, s9 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s6, s11 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 2048 +; RV64IM-NEXT: and a1, s6, s7 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t1, 16384 +; RV64IM-NEXT: and a1, s6, t1 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 32768 +; RV64IM-NEXT: and t2, s6, t2 +; RV64IM-NEXT: lui t3, 65536 +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 131072 +; RV64IM-NEXT: and a5, s6, t4 +; RV64IM-NEXT: lui t5, 262144 +; RV64IM-NEXT: and t0, s6, t5 +; RV64IM-NEXT: and s11, s6, s0 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a2 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s6, ra +; RV64IM-NEXT: and a1, s6, a4 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t6 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s2 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s4 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, s6, s5 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s6, a1 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s6, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s6, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s6, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, s6, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s6, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s6, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s6, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s6, a1 +; RV64IM-NEXT: andi a1, s6, 64 +; RV64IM-NEXT: andi a2, s6, 128 +; RV64IM-NEXT: andi a3, s6, 1024 +; RV64IM-NEXT: srliw a4, s6, 31 +; RV64IM-NEXT: srli s6, s6, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t3, a0, a3 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: mul t5, a0, t2 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a3, a0, s11 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: mul a5, a0, ra +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s6, s6, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, a0 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, a0, t3 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a0, a3 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, a0, s5 +; RV64IM-NEXT: xor t4, s6, t4 +; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: xor a7, a7, t6 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a3, a3, t0 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a5, s5, s7 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a5, a5, s8 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a5, a5, s9 +; RV64IM-NEXT: xor t2, t0, t3 +; RV64IM-NEXT: xor t1, t2, t1 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, ra +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a5, a5, s10 +; RV64IM-NEXT: xor a7, t1, a7 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a6, 5 +; RV64IM-NEXT: addi a6, a6, 1365 +; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a5, a0 +; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a7, t1 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: or a4, t0, a5 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a3, t0 +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a5, a7 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: and a0, a0, a6 +; RV64IM-NEXT: and a1, a1, a6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 480 +; RV64IM-NEXT: ret + %a.ext = zext i16 %a to i32 + %b.ext = zext i16 %b to i32 + %clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext) + %res.ext = lshr i32 %clmul, 15 + %res = trunc i32 %res.ext to i16 + ret i16 %res +} + +define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { +; RV32IM-LABEL: clmulr_i32: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui t3, 61681 +; RV32IM-NEXT: lui t5, 209715 +; RV32IM-NEXT: lui t6, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli a4, a1, 24 +; RV32IM-NEXT: slli a5, a1, 24 +; RV32IM-NEXT: li s7, 1 +; RV32IM-NEXT: lui t2, 4 +; RV32IM-NEXT: lui s0, 8 +; RV32IM-NEXT: lui s1, 32 +; RV32IM-NEXT: lui s2, 64 +; RV32IM-NEXT: lui s3, 128 +; RV32IM-NEXT: lui s4, 256 +; RV32IM-NEXT: lui s8, 512 +; RV32IM-NEXT: lui a7, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi s5, a3, -256 +; RV32IM-NEXT: and t0, t0, s5 +; RV32IM-NEXT: or t1, t0, t1 +; RV32IM-NEXT: lui a6, 32768 +; RV32IM-NEXT: and t4, t4, s5 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: lui t0, 65536 +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or t4, a5, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: or a0, a0, t1 +; RV32IM-NEXT: lui a5, 524288 +; RV32IM-NEXT: addi t3, t3, -241 +; RV32IM-NEXT: addi t5, t5, 819 +; RV32IM-NEXT: addi t6, t6, 1365 +; RV32IM-NEXT: slli s7, s7, 11 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 4 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 4 +; RV32IM-NEXT: and a4, a4, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a4, a4, 4 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 2 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 2 +; RV32IM-NEXT: and a4, a4, t5 +; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: slli a4, a4, 2 +; RV32IM-NEXT: or t4, t4, a4 +; RV32IM-NEXT: srli a4, a0, 1 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a4, a4, t6 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a4, a4, a0 +; RV32IM-NEXT: srli a0, t4, 1 +; RV32IM-NEXT: and t4, t4, t6 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: slli t4, t4, 1 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: andi t4, a0, 2 +; RV32IM-NEXT: and s6, a0, s7 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t1, 2 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t1, a0, t2 +; RV32IM-NEXT: sw t1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and a3, a0, a3 +; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s2 +; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s3, a0, s3 +; RV32IM-NEXT: and a3, a0, s4 +; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s8 +; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a7 +; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s9, a0, s9 +; RV32IM-NEXT: and a3, a0, s10 +; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s11 +; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, ra +; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a6 +; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, t0 +; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 1 +; RV32IM-NEXT: andi a2, a0, 4 +; RV32IM-NEXT: andi a3, a0, 8 +; RV32IM-NEXT: andi a5, a0, 16 +; RV32IM-NEXT: andi a6, a0, 32 +; RV32IM-NEXT: andi a7, a0, 64 +; RV32IM-NEXT: andi t0, a0, 128 +; RV32IM-NEXT: andi t1, a0, 256 +; RV32IM-NEXT: andi t2, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul t4, a4, t4 +; RV32IM-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, a4, a1 +; RV32IM-NEXT: mul s11, a4, a2 +; RV32IM-NEXT: mul s8, a4, a3 +; RV32IM-NEXT: mul s7, a4, a5 +; RV32IM-NEXT: mul s4, a4, a6 +; RV32IM-NEXT: mul a1, a4, a7 +; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a4, t0 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s2, a4, t1 +; RV32IM-NEXT: mul t2, a4, t2 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, s6 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a4, a0 +; RV32IM-NEXT: mul s1, a4, s0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, a4, s3 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a6, a4, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a4, a0 +; RV32IM-NEXT: mul s6, a4, s9 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a4, a5 +; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a4, t0 +; RV32IM-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a4, s0 +; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a4, s3 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s9, a4, s9 +; RV32IM-NEXT: lw s10, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a4, s10 +; RV32IM-NEXT: lw s10, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor ra, ra, s10 +; RV32IM-NEXT: xor s8, s11, s8 +; RV32IM-NEXT: xor s4, s7, s4 +; RV32IM-NEXT: xor t2, s2, t2 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, ra, s8 +; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s4, a3 +; RV32IM-NEXT: lw t1, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t2, t1 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, t1, a3 +; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a7, a5 +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s6 +; RV32IM-NEXT: xor a0, a0, s0 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s3 +; RV32IM-NEXT: xor a3, a1, a3 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: xor a0, a0, s9 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: and a3, a2, s5 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s5 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and a1, a1, t5 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a1, a1, t6 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulr_i32: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -512 +; RV64IM-NEXT: sd ra, 504(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 496(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 488(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 480(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: li s1, 255 +; RV64IM-NEXT: srli a6, a0, 40 +; RV64IM-NEXT: lui a4, 16 +; RV64IM-NEXT: srli t2, a0, 56 +; RV64IM-NEXT: srliw t3, a0, 24 +; RV64IM-NEXT: slli a2, a0, 56 +; RV64IM-NEXT: lui t4, 61681 +; RV64IM-NEXT: lui t6, 209715 +; RV64IM-NEXT: lui s9, 349525 +; RV64IM-NEXT: srli s7, a1, 24 +; RV64IM-NEXT: srli s5, a1, 8 +; RV64IM-NEXT: srli t5, a1, 40 +; RV64IM-NEXT: srli t0, a1, 56 +; RV64IM-NEXT: srliw ra, a1, 24 +; RV64IM-NEXT: slli a5, a1, 56 +; RV64IM-NEXT: li t1, 1 +; RV64IM-NEXT: lui s10, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui s3, 4096 +; RV64IM-NEXT: lui s0, 8192 +; RV64IM-NEXT: lui s8, 4080 +; RV64IM-NEXT: and a3, a3, s8 +; RV64IM-NEXT: slli s1, s1, 24 +; RV64IM-NEXT: addi s11, a4, -256 +; RV64IM-NEXT: and a7, a7, s1 +; RV64IM-NEXT: sd s1, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, a7, a3 +; RV64IM-NEXT: and a7, a0, s8 +; RV64IM-NEXT: slli t3, t3, 32 +; RV64IM-NEXT: addi s4, t4, -241 +; RV64IM-NEXT: addi s6, t6, 819 +; RV64IM-NEXT: addi a4, s9, 1365 +; RV64IM-NEXT: and t4, s7, s8 +; RV64IM-NEXT: and a6, a6, s11 +; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: and t2, a1, s8 +; RV64IM-NEXT: slli t6, ra, 32 +; RV64IM-NEXT: slli a7, a7, 24 +; RV64IM-NEXT: or s9, a7, t3 +; RV64IM-NEXT: slli a7, s4, 32 +; RV64IM-NEXT: add s4, s4, a7 +; RV64IM-NEXT: slli a7, s6, 32 +; RV64IM-NEXT: add s6, s6, a7 +; RV64IM-NEXT: slli s7, t1, 11 +; RV64IM-NEXT: and a7, s5, s1 +; RV64IM-NEXT: or a7, a7, t4 +; RV64IM-NEXT: slli t4, t1, 32 +; RV64IM-NEXT: and t3, t5, s11 +; RV64IM-NEXT: or t0, t3, t0 +; RV64IM-NEXT: slli ra, t1, 33 +; RV64IM-NEXT: slli t2, t2, 24 +; RV64IM-NEXT: or t2, t2, t6 +; RV64IM-NEXT: slli s1, t1, 34 +; RV64IM-NEXT: or a3, a3, a6 +; RV64IM-NEXT: slli a6, t1, 35 +; RV64IM-NEXT: sd a6, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s11 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: slli a2, t1, 36 +; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a7, t0 +; RV64IM-NEXT: slli a7, t1, 37 +; RV64IM-NEXT: and a1, a1, s11 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a5, a1 +; RV64IM-NEXT: sd a4, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a5, a4, 32 +; RV64IM-NEXT: add a5, a4, a5 +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, t2 +; RV64IM-NEXT: or a0, a0, a3 +; RV64IM-NEXT: or a1, a1, a2 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s4, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s6, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: and a2, a2, a5 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, a5 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s5, a3, a1 +; RV64IM-NEXT: andi a1, s5, 2 +; RV64IM-NEXT: andi a2, s5, 1 +; RV64IM-NEXT: andi a3, s5, 4 +; RV64IM-NEXT: andi a5, s5, 8 +; RV64IM-NEXT: andi a6, s5, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s5, 256 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, t1, 38 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 40 +; RV64IM-NEXT: and a2, s5, s10 +; RV64IM-NEXT: and a3, s5, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 41 +; RV64IM-NEXT: and a3, s5, s3 +; RV64IM-NEXT: and a4, s5, s0 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t1, 48 +; RV64IM-NEXT: and a4, s5, t4 +; RV64IM-NEXT: and a5, s5, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t1, 49 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 56 +; RV64IM-NEXT: and a2, s5, a3 +; RV64IM-NEXT: and a3, s5, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 57 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli ra, t1, 39 +; RV64IM-NEXT: slli a2, t1, 42 +; RV64IM-NEXT: slli a4, t1, 43 +; RV64IM-NEXT: slli s0, t1, 44 +; RV64IM-NEXT: slli s2, t1, 45 +; RV64IM-NEXT: slli s3, t1, 46 +; RV64IM-NEXT: slli s6, t1, 47 +; RV64IM-NEXT: slli a1, t1, 50 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 51 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 52 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 53 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 54 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 55 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 58 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 59 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 60 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 61 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, t1, 62 +; RV64IM-NEXT: sd t1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, s5, s7 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a1, 16 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s5, s9 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s5, s11 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s5, s10 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s5, s8 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t0, 2048 +; RV64IM-NEXT: and t0, s5, t0 +; RV64IM-NEXT: lui t2, 16384 +; RV64IM-NEXT: and t2, s5, t2 +; RV64IM-NEXT: lui t3, 32768 +; RV64IM-NEXT: and a1, s5, t3 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 65536 +; RV64IM-NEXT: and a1, s5, t4 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t5, 131072 +; RV64IM-NEXT: and a5, s5, t5 +; RV64IM-NEXT: lui t6, 262144 +; RV64IM-NEXT: and a6, s5, t6 +; RV64IM-NEXT: and s11, s5, s1 +; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t5, s5, a1 +; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t6, s5, a1 +; RV64IM-NEXT: and a1, s5, a7 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s4 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s5, ra +; RV64IM-NEXT: and a1, s5, a2 +; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a4 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s0 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s3 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s6 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s5, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, s5, a1 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s5, a1 +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s5, a1 +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s5, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s5, a1 +; RV64IM-NEXT: andi a1, s5, 64 +; RV64IM-NEXT: andi a2, s5, 128 +; RV64IM-NEXT: andi a3, s5, 1024 +; RV64IM-NEXT: srliw a4, s5, 31 +; RV64IM-NEXT: srli t3, s5, 63 +; RV64IM-NEXT: mul s2, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s0, a0, a3 +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s3, a0, a1 +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a7, a0, t2 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s1, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a6 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a5, a0, s11 +; RV64IM-NEXT: mul t2, a0, t5 +; RV64IM-NEXT: mul s11, a0, t6 +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, ra +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s5, a0, a1 +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: ld a3, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: ld t0, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, t0 +; RV64IM-NEXT: ld t6, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, t6 +; RV64IM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, ra +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli t3, t3, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, t3 +; RV64IM-NEXT: ld t3, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, s4 +; RV64IM-NEXT: ld s4, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, s4, s2 +; RV64IM-NEXT: ld s4, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s4, s0 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s4, t1 +; RV64IM-NEXT: ld s4, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, s4, a7 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s4, a5 +; RV64IM-NEXT: ld s4, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s4, s6 +; RV64IM-NEXT: xor t3, t3, s2 +; RV64IM-NEXT: ld s2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, s2 +; RV64IM-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, t4, s2 +; RV64IM-NEXT: xor t1, t1, s3 +; RV64IM-NEXT: xor a7, a7, s1 +; RV64IM-NEXT: xor a5, a5, t2 +; RV64IM-NEXT: xor a2, a2, a6 +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: xor a3, s6, s7 +; RV64IM-NEXT: ld a6, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, t3, a6 +; RV64IM-NEXT: ld t2, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s0, t2 +; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, t3 +; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t4 +; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: xor a5, a5, s11 +; RV64IM-NEXT: xor a2, a2, t5 +; RV64IM-NEXT: xor a1, a1, t0 +; RV64IM-NEXT: xor a3, a3, s8 +; RV64IM-NEXT: ld t0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t3, t0 +; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t3 +; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t3 +; RV64IM-NEXT: xor a2, a2, s5 +; RV64IM-NEXT: xor a1, a1, t6 +; RV64IM-NEXT: xor a3, a3, s9 +; RV64IM-NEXT: xor t2, a6, t2 +; RV64IM-NEXT: xor t0, t2, t0 +; RV64IM-NEXT: ld t2, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t2 +; RV64IM-NEXT: ld t2, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: xor a1, a1, ra +; RV64IM-NEXT: xor a3, a3, s10 +; RV64IM-NEXT: xor t0, t0, t1 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld a7, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a7 +; RV64IM-NEXT: ld a7, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a7 +; RV64IM-NEXT: slli a6, a6, 56 +; RV64IM-NEXT: ld a7, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a7 +; RV64IM-NEXT: xor a0, a3, a0 +; RV64IM-NEXT: ld t1, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, t0, t1 +; RV64IM-NEXT: xor a4, t0, a4 +; RV64IM-NEXT: slli a3, a3, 40 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: or a3, a6, a3 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a4, a7 +; RV64IM-NEXT: xor a2, a4, a2 +; RV64IM-NEXT: srli a4, a4, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld a6, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a4, a4, a6 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: or a2, a4, a2 +; RV64IM-NEXT: and a1, a7, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a4, a5, a6 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a3, a4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: ld ra, 504(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 496(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 488(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 480(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 512 +; RV64IM-NEXT: ret + %a.ext = zext i32 %a to i64 + %b.ext = zext i32 %b to i64 + %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext) + %res.ext = lshr i64 %clmul, 31 + %res = trunc i64 %res.ext to i32 + ret i32 %res +} + +define i4 @clmulr_constfold_i4() nounwind { +; CHECK-LABEL: clmulr_constfold_i4: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret + %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2) + %res.ext = lshr i8 %clmul, 3 + %res = trunc i8 %res.ext to i4 + ret i4 %res +} + +define i16 @clmulr_constfold_i16() nounwind { +; CHECK-LABEL: clmulr_constfold_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: addi a0, a0, 1365 +; CHECK-NEXT: ret + %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1) + %res.ext = lshr i32 %clmul, 15 + %res = trunc i32 %res.ext to i16 + ret i16 %res +} + +define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { +; RV32IM-LABEL: clmulh_i4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui t2, 61681 +; RV32IM-NEXT: lui t3, 209715 +; RV32IM-NEXT: lui a7, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t5, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t6, 1 +; RV32IM-NEXT: lui s0, 4 +; RV32IM-NEXT: lui s1, 8 +; RV32IM-NEXT: lui s2, 32 +; RV32IM-NEXT: lui s3, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi s4, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, s4 +; RV32IM-NEXT: or a3, t0, t1 +; RV32IM-NEXT: lui t0, 32768 +; RV32IM-NEXT: and t1, t4, s4 +; RV32IM-NEXT: or t4, t1, t5 +; RV32IM-NEXT: lui a6, 65536 +; RV32IM-NEXT: and a0, a0, s4 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or t5, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s4 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: addi t2, t2, -241 +; RV32IM-NEXT: addi t3, t3, 819 +; RV32IM-NEXT: addi a7, a7, 1365 +; RV32IM-NEXT: or a3, t5, a3 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: srli t4, a3, 4 +; RV32IM-NEXT: and a3, a3, t2 +; RV32IM-NEXT: srli t5, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and t4, t4, t2 +; RV32IM-NEXT: slli a3, a3, 4 +; RV32IM-NEXT: and t5, t5, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 2 +; RV32IM-NEXT: and a3, a3, t3 +; RV32IM-NEXT: srli t5, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a3, a3, 2 +; RV32IM-NEXT: and t5, t5, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 1 +; RV32IM-NEXT: and a3, a3, a7 +; RV32IM-NEXT: srli t5, a0, 1 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and t4, t4, a7 +; RV32IM-NEXT: and a7, t5, a7 +; RV32IM-NEXT: lui a4, 524288 +; RV32IM-NEXT: slli t6, t6, 11 +; RV32IM-NEXT: slli a3, a3, 1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, a7, a0 +; RV32IM-NEXT: andi t5, a0, 2 +; RV32IM-NEXT: andi t4, a0, 1 +; RV32IM-NEXT: and t6, a0, t6 +; RV32IM-NEXT: lui a7, 1 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a7, 2 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: sw s0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a0, s2 +; RV32IM-NEXT: and a5, a0, s3 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s5 +; RV32IM-NEXT: sw a5, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s6 +; RV32IM-NEXT: sw a5, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s7, a0, s7 +; RV32IM-NEXT: and s8, a0, s8 +; RV32IM-NEXT: and a5, a0, s9 +; RV32IM-NEXT: sw a5, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s10 +; RV32IM-NEXT: sw a5, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s11 +; RV32IM-NEXT: sw a5, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, ra +; RV32IM-NEXT: sw a5, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, t0 +; RV32IM-NEXT: sw a5, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a6 +; RV32IM-NEXT: sw a5, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a4 +; RV32IM-NEXT: sw a4, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 4 +; RV32IM-NEXT: andi a2, a0, 8 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a6, a0, 64 +; RV32IM-NEXT: andi a7, a0, 128 +; RV32IM-NEXT: andi t0, a0, 256 +; RV32IM-NEXT: andi t1, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul t5, a3, t5 +; RV32IM-NEXT: sw t5, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, a3, t4 +; RV32IM-NEXT: mul a1, a3, a1 +; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s6, a3, a2 +; RV32IM-NEXT: mul s5, a3, a4 +; RV32IM-NEXT: mul s3, a3, a5 +; RV32IM-NEXT: mul a1, a3, a6 +; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a3, a7 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s0, a3, t0 +; RV32IM-NEXT: mul t5, a3, t1 +; RV32IM-NEXT: mul s11, a3, a0 +; RV32IM-NEXT: mul a0, a3, t6 +; RV32IM-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a3, a0 +; RV32IM-NEXT: mul s1, a3, s1 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, a3, a0 +; RV32IM-NEXT: mul a0, a3, s2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a3, a0 +; RV32IM-NEXT: mul a6, a3, s7 +; RV32IM-NEXT: mul t4, a3, s8 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: lw a5, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a5 +; RV32IM-NEXT: lw t0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, t0 +; RV32IM-NEXT: lw t6, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, t6 +; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, s2 +; RV32IM-NEXT: lw s8, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s8, a3, s8 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, s9 +; RV32IM-NEXT: lw s9, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s10, s10, s9 +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor s3, s5, s3 +; RV32IM-NEXT: xor t5, s0, t5 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s10, s6 +; RV32IM-NEXT: lw a4, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, s3, a4 +; RV32IM-NEXT: xor t1, t5, s11 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a4 +; RV32IM-NEXT: lw a4, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, t1, a4 +; RV32IM-NEXT: xor a5, a7, ra +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s7 +; RV32IM-NEXT: xor a0, a0, t6 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s2 +; RV32IM-NEXT: xor a4, a1, a4 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a0, a0, s8 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a0, a3 +; RV32IM-NEXT: and a3, a2, s4 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s4 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and a1, a1, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: andi a1, a0, 5 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: andi a0, a0, 20 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulh_i4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -480 +; RV64IM-NEXT: sd ra, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: srli t1, a0, 56 +; RV64IM-NEXT: srliw t4, a0, 24 +; RV64IM-NEXT: slli a5, a0, 56 +; RV64IM-NEXT: lui s3, 61681 +; RV64IM-NEXT: lui t5, 209715 +; RV64IM-NEXT: lui s6, 349525 +; RV64IM-NEXT: srli s9, a1, 24 +; RV64IM-NEXT: srli s0, a1, 8 +; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli t2, a1, 56 +; RV64IM-NEXT: srliw s11, a1, 24 +; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: li t0, 1 +; RV64IM-NEXT: lui s1, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui t6, 4096 +; RV64IM-NEXT: lui s5, 8192 +; RV64IM-NEXT: lui s7, 4080 +; RV64IM-NEXT: and a2, a3, s7 +; RV64IM-NEXT: slli t3, s4, 24 +; RV64IM-NEXT: addi s8, s10, -256 +; RV64IM-NEXT: and a3, a7, t3 +; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s7 +; RV64IM-NEXT: slli t4, t4, 32 +; RV64IM-NEXT: addi s3, s3, -241 +; RV64IM-NEXT: addi s4, t5, 819 +; RV64IM-NEXT: addi s6, s6, 1365 +; RV64IM-NEXT: and a7, s9, s7 +; RV64IM-NEXT: and a4, a4, s8 +; RV64IM-NEXT: or a4, a4, t1 +; RV64IM-NEXT: and t1, a1, s7 +; RV64IM-NEXT: slli t5, s11, 32 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s9, a3, t4 +; RV64IM-NEXT: slli a3, s3, 32 +; RV64IM-NEXT: add s3, s3, a3 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli t4, t0, 11 +; RV64IM-NEXT: and a3, s0, t3 +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli s11, t0, 32 +; RV64IM-NEXT: and a7, ra, s8 +; RV64IM-NEXT: or a7, a7, t2 +; RV64IM-NEXT: slli ra, t0, 33 +; RV64IM-NEXT: slli t1, t1, 24 +; RV64IM-NEXT: or t1, t1, t5 +; RV64IM-NEXT: slli s0, t0, 34 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t0, 35 +; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a5, a0 +; RV64IM-NEXT: slli a4, t0, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a7, t0, 37 +; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: slli a6, t0, 38 +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, t1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s3 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s3 +; RV64IM-NEXT: and a2, a2, s3 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s3 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s4, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s6, a3, a1 +; RV64IM-NEXT: andi a1, s6, 2 +; RV64IM-NEXT: andi a2, s6, 1 +; RV64IM-NEXT: andi a3, s6, 4 +; RV64IM-NEXT: andi a4, s6, 8 +; RV64IM-NEXT: andi a5, s6, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s6, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s3, t0, 39 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 40 +; RV64IM-NEXT: and a2, s6, s1 +; RV64IM-NEXT: and a3, s6, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 41 +; RV64IM-NEXT: and a3, s6, t6 +; RV64IM-NEXT: and a4, s6, s5 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t0, 48 +; RV64IM-NEXT: and a4, s6, s11 +; RV64IM-NEXT: and a5, s6, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t0, 49 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 56 +; RV64IM-NEXT: and a2, s6, a3 +; RV64IM-NEXT: and a3, s6, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 57 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 42 +; RV64IM-NEXT: slli ra, t0, 43 +; RV64IM-NEXT: slli a4, t0, 44 +; RV64IM-NEXT: slli t6, t0, 45 +; RV64IM-NEXT: slli s1, t0, 46 +; RV64IM-NEXT: slli s2, t0, 47 +; RV64IM-NEXT: slli s4, t0, 50 +; RV64IM-NEXT: slli s5, t0, 51 +; RV64IM-NEXT: slli a1, t0, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 59 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 60 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 61 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t0, t0, 62 +; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t4 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s6, s9 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s6, s11 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 2048 +; RV64IM-NEXT: and a1, s6, s7 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t1, 16384 +; RV64IM-NEXT: and a1, s6, t1 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 32768 +; RV64IM-NEXT: and t2, s6, t2 +; RV64IM-NEXT: lui t3, 65536 +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 131072 +; RV64IM-NEXT: and a5, s6, t4 +; RV64IM-NEXT: lui t5, 262144 +; RV64IM-NEXT: and t0, s6, t5 +; RV64IM-NEXT: and s11, s6, s0 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a2 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s6, ra +; RV64IM-NEXT: and a1, s6, a4 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t6 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s2 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s4 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, s6, s5 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s6, a1 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s6, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s6, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s6, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, s6, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s6, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s6, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s6, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s6, a1 +; RV64IM-NEXT: andi a1, s6, 64 +; RV64IM-NEXT: andi a2, s6, 128 +; RV64IM-NEXT: andi a3, s6, 1024 +; RV64IM-NEXT: srliw a4, s6, 31 +; RV64IM-NEXT: srli s6, s6, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t3, a0, a3 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: mul t5, a0, t2 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a3, a0, s11 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: mul a5, a0, ra +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s6, s6, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, a0 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, a0, t3 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a0, a3 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, a0, s5 +; RV64IM-NEXT: xor t4, s6, t4 +; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: xor a7, a7, t6 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a3, a3, t0 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a5, s5, s7 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a5, a5, s8 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a5, a5, s9 +; RV64IM-NEXT: xor t2, t0, t3 +; RV64IM-NEXT: xor t1, t2, t1 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, ra +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a5, a5, s10 +; RV64IM-NEXT: xor a7, t1, a7 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a6, %hi(.LCPI14_0) +; RV64IM-NEXT: ld a6, %lo(.LCPI14_0)(a6) +; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a5, a0 +; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a7, t1 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: or a4, t0, a5 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a3, t0 +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a5, a7 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: andi a0, a0, 5 +; RV64IM-NEXT: and a1, a1, a6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 59 +; RV64IM-NEXT: srli a0, a0, 60 +; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 480 +; RV64IM-NEXT: ret + %a.ext = zext i4 %a to i8 + %b.ext = zext i4 %b to i8 + %clmul = call i8 @llvm.clmul.i8(i8 %a.ext, i8 %b.ext) + %res.ext = lshr i8 %clmul, 4 + %res = trunc i8 %res.ext to i4 + ret i4 %res +} + +define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { +; RV32IM-LABEL: clmulh_i4_bitreverse: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui s1, 61681 +; RV32IM-NEXT: lui s3, 209715 +; RV32IM-NEXT: lui a6, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t6, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t3, 1 +; RV32IM-NEXT: lui s11, 2 +; RV32IM-NEXT: lui t2, 4 +; RV32IM-NEXT: lui s10, 8 +; RV32IM-NEXT: lui t5, 32 +; RV32IM-NEXT: lui s0, 64 +; RV32IM-NEXT: lui s2, 128 +; RV32IM-NEXT: lui s4, 256 +; RV32IM-NEXT: lui s5, 512 +; RV32IM-NEXT: lui s6, 1024 +; RV32IM-NEXT: lui s7, 2048 +; RV32IM-NEXT: lui s8, 4096 +; RV32IM-NEXT: lui s9, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi a3, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, a3 +; RV32IM-NEXT: or t1, t0, t1 +; RV32IM-NEXT: lui a7, 32768 +; RV32IM-NEXT: and t4, t4, a3 +; RV32IM-NEXT: or t6, t4, t6 +; RV32IM-NEXT: lui t0, 65536 +; RV32IM-NEXT: and a0, a0, a3 +; RV32IM-NEXT: mv t4, a3 +; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a2, a2, a0 +; RV32IM-NEXT: lui a3, 131072 +; RV32IM-NEXT: and a1, a1, t4 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: addi s1, s1, -241 +; RV32IM-NEXT: addi s3, s3, 819 +; RV32IM-NEXT: or a2, a2, t1 +; RV32IM-NEXT: addi a4, a6, 1365 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a0, a0, t6 +; RV32IM-NEXT: srli a6, a2, 4 +; RV32IM-NEXT: and a2, a2, s1 +; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: slli a2, a2, 4 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 4 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 2 +; RV32IM-NEXT: and a2, a2, s3 +; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: slli a2, a2, 2 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 2 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 1 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: and a6, a6, a4 +; RV32IM-NEXT: slli a2, a2, 1 +; RV32IM-NEXT: or a6, a6, a2 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: slli t3, t3, 11 +; RV32IM-NEXT: and t3, a0, t3 +; RV32IM-NEXT: lui a4, 1 +; RV32IM-NEXT: and t4, a0, a4 +; RV32IM-NEXT: and s11, a0, s11 +; RV32IM-NEXT: and a4, a0, t2 +; RV32IM-NEXT: sw a4, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s10 +; RV32IM-NEXT: sw a4, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t5 +; RV32IM-NEXT: sw a4, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and a4, a0, s2 +; RV32IM-NEXT: sw a4, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s4, a0, s4 +; RV32IM-NEXT: and a4, a0, s5 +; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s6 +; RV32IM-NEXT: sw a4, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s7 +; RV32IM-NEXT: sw a4, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s8 +; RV32IM-NEXT: sw a4, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s9 +; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, ra +; RV32IM-NEXT: sw a4, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a7 +; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t0 +; RV32IM-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a3 +; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi ra, a0, 2 +; RV32IM-NEXT: andi a1, a0, 1 +; RV32IM-NEXT: andi a2, a0, 4 +; RV32IM-NEXT: andi a3, a0, 8 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a7, a0, 64 +; RV32IM-NEXT: andi t0, a0, 128 +; RV32IM-NEXT: andi t1, a0, 256 +; RV32IM-NEXT: andi t2, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul ra, a6, ra +; RV32IM-NEXT: mul s10, a6, a1 +; RV32IM-NEXT: mul s9, a6, a2 +; RV32IM-NEXT: mul s5, a6, a3 +; RV32IM-NEXT: mul s6, a6, a4 +; RV32IM-NEXT: mul s2, a6, a5 +; RV32IM-NEXT: mul a1, a6, a7 +; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a6, t0 +; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t6, a6, t1 +; RV32IM-NEXT: mul t2, a6, t2 +; RV32IM-NEXT: mul s7, a6, a0 +; RV32IM-NEXT: mul a0, a6, t3 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, t4 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t1, a6, s11 +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a6, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t5, a6, a0 +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s8, a6, a0 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, s0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a6, a0 +; RV32IM-NEXT: mul a2, a6, s4 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a6, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t3, a6, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a6, a0 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a6, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: lw a4, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a6, a4 +; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a6, t0 +; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a6, t4 +; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a6, s0 +; RV32IM-NEXT: lw s11, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, a6, s11 +; RV32IM-NEXT: sw s11, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a6, a6, s11 +; RV32IM-NEXT: xor s10, s10, ra +; RV32IM-NEXT: xor s5, s9, s5 +; RV32IM-NEXT: xor s2, s6, s2 +; RV32IM-NEXT: xor t2, t6, t2 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s10, s5 +; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s2, a3 +; RV32IM-NEXT: xor t1, t2, s7 +; RV32IM-NEXT: xor a7, a7, t5 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, t1, a3 +; RV32IM-NEXT: xor a4, a7, s8 +; RV32IM-NEXT: xor a2, a2, t3 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a5, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: lw a5, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: lw a5, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a2, a2, s4 +; RV32IM-NEXT: xor a0, a0, t4 +; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a0, a0, s0 +; RV32IM-NEXT: lui a5, 349525 +; RV32IM-NEXT: addi a5, a5, 1364 +; RV32IM-NEXT: xor a3, a1, a3 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a3, a3, a4 +; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a6 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, a6 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a1, a1, s1 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a1, a1, s3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a5 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulh_i4_bitreverse: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -496 +; RV64IM-NEXT: sd ra, 488(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 480(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a5, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli ra, a0, 40 +; RV64IM-NEXT: lui s11, 16 +; RV64IM-NEXT: srli t0, a0, 56 +; RV64IM-NEXT: srliw t2, a0, 24 +; RV64IM-NEXT: slli a6, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t4, 209715 +; RV64IM-NEXT: lui s8, 349525 +; RV64IM-NEXT: srli s3, a1, 24 +; RV64IM-NEXT: srli t6, a1, 8 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srli t5, a1, 56 +; RV64IM-NEXT: srliw s7, a1, 24 +; RV64IM-NEXT: slli a4, a1, 56 +; RV64IM-NEXT: li t1, 1 +; RV64IM-NEXT: lui s1, 256 +; RV64IM-NEXT: lui s2, 4096 +; RV64IM-NEXT: lui s0, 8192 +; RV64IM-NEXT: lui s9, 4080 +; RV64IM-NEXT: and a2, a3, s9 +; RV64IM-NEXT: slli s5, s4, 24 +; RV64IM-NEXT: addi s10, s11, -256 +; RV64IM-NEXT: and a3, a5, s5 +; RV64IM-NEXT: sd s5, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s9 +; RV64IM-NEXT: slli t2, t2, 32 +; RV64IM-NEXT: addi s4, t3, -241 +; RV64IM-NEXT: addi s6, t4, 819 +; RV64IM-NEXT: addi s8, s8, 1365 +; RV64IM-NEXT: and a5, s3, s9 +; RV64IM-NEXT: and t3, ra, s10 +; RV64IM-NEXT: or t0, t3, t0 +; RV64IM-NEXT: and t3, a1, s9 +; RV64IM-NEXT: slli t4, s7, 32 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s3, a3, t2 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli a3, s8, 32 +; RV64IM-NEXT: add s8, s8, a3 +; RV64IM-NEXT: slli s7, t1, 11 +; RV64IM-NEXT: and a3, t6, s5 +; RV64IM-NEXT: or a3, a3, a5 +; RV64IM-NEXT: slli t2, t1, 32 +; RV64IM-NEXT: and a5, a7, s10 +; RV64IM-NEXT: or a5, a5, t5 +; RV64IM-NEXT: slli ra, t1, 33 +; RV64IM-NEXT: slli t3, t3, 24 +; RV64IM-NEXT: or a7, t3, t4 +; RV64IM-NEXT: slli t3, t1, 34 +; RV64IM-NEXT: sd t3, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a2, t0 +; RV64IM-NEXT: slli t0, t1, 35 +; RV64IM-NEXT: sd t0, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a6, a0 +; RV64IM-NEXT: slli a6, t1, 36 +; RV64IM-NEXT: or a3, a3, a5 +; RV64IM-NEXT: slli a5, t1, 37 +; RV64IM-NEXT: sd a5, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a4, a1 +; RV64IM-NEXT: or a0, a0, s3 +; RV64IM-NEXT: or a1, a1, a7 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s4, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s6, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: sd s8, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: and a2, a2, s8 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s8 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s5, a3, a1 +; RV64IM-NEXT: andi a1, s5, 2 +; RV64IM-NEXT: andi a2, s5, 1 +; RV64IM-NEXT: andi a3, s5, 4 +; RV64IM-NEXT: andi a4, s5, 8 +; RV64IM-NEXT: andi a5, s5, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s5, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s3, t1, 38 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 40 +; RV64IM-NEXT: lui a2, 128 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: and a3, s5, s1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 41 +; RV64IM-NEXT: and a3, s5, s2 +; RV64IM-NEXT: and a4, s5, s0 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t1, 48 +; RV64IM-NEXT: and a4, s5, t2 +; RV64IM-NEXT: and a5, s5, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t1, 49 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 56 +; RV64IM-NEXT: and a2, s5, a3 +; RV64IM-NEXT: and a3, s5, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 57 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 39 +; RV64IM-NEXT: slli ra, t1, 42 +; RV64IM-NEXT: slli a4, t1, 43 +; RV64IM-NEXT: slli a5, t1, 44 +; RV64IM-NEXT: slli s0, t1, 45 +; RV64IM-NEXT: slli s1, t1, 46 +; RV64IM-NEXT: slli s2, t1, 47 +; RV64IM-NEXT: slli s4, t1, 50 +; RV64IM-NEXT: slli s6, t1, 51 +; RV64IM-NEXT: slli a1, t1, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 59 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 60 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 61 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, t1, 62 +; RV64IM-NEXT: sd t1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, s5, s7 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s11 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 32 +; RV64IM-NEXT: and a1, s5, s8 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s5, s11 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s5, s10 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 1024 +; RV64IM-NEXT: and a1, s5, s9 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t0, 2048 +; RV64IM-NEXT: and a1, s5, t0 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 16384 +; RV64IM-NEXT: and a1, s5, t2 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t3, 32768 +; RV64IM-NEXT: and t3, s5, t3 +; RV64IM-NEXT: lui t4, 65536 +; RV64IM-NEXT: and a1, s5, t4 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t5, 131072 +; RV64IM-NEXT: and a7, s5, t5 +; RV64IM-NEXT: lui t6, 262144 +; RV64IM-NEXT: and t6, s5, t6 +; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, s5, a1 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a6 +; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s3 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a2 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s5, ra +; RV64IM-NEXT: and a1, s5, a4 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a5 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s0 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s1 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s4 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, s5, s6 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s5, a1 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s5, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s5, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s5, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, s5, a1 +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s5, a1 +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s5, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s5, a1 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s5, a1 +; RV64IM-NEXT: andi a1, s5, 64 +; RV64IM-NEXT: andi a2, s5, 128 +; RV64IM-NEXT: andi a3, s5, 1024 +; RV64IM-NEXT: srliw a4, s5, 31 +; RV64IM-NEXT: srli s5, s5, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t2, a0, a3 +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a5, a0, a1 +; RV64IM-NEXT: mul t3, a0, t3 +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a7 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t6 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a2, a0, s11 +; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul ra, a0, ra +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, a0, a1 +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s5, s5, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s5 +; RV64IM-NEXT: sd a0, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld s5, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s5, a0 +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, a0, t2 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, a0, t0 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a0, a5 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor ra, a0, ra +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, a0, s6 +; RV64IM-NEXT: xor t4, s5, t4 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, a0 +; RV64IM-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, a0 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a5, a5, t3 +; RV64IM-NEXT: xor a2, a2, a7 +; RV64IM-NEXT: xor a3, ra, a3 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a7, s6, s7 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, a0 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a7, a7, s8 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a3, a3, t6 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a7, a7, s9 +; RV64IM-NEXT: xor t1, t3, t2 +; RV64IM-NEXT: xor t0, t1, t0 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a7, a7, s10 +; RV64IM-NEXT: xor a6, t0, a6 +; RV64IM-NEXT: xor a4, a5, a4 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a5, %hi(.LCPI15_0) +; RV64IM-NEXT: ld a5, %lo(.LCPI15_0)(a5) +; RV64IM-NEXT: slli t3, t3, 56 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a7, a0 +; RV64IM-NEXT: ld t1, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a6, t1 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: xor a2, a4, a2 +; RV64IM-NEXT: or a4, t3, a7 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a6, a2, t0 +; RV64IM-NEXT: xor a3, a2, a3 +; RV64IM-NEXT: srli a2, a2, 8 +; RV64IM-NEXT: slli a6, a6, 24 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: ld a7, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: srli a3, a3, 24 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a3, a3, t0 +; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: or a2, a2, a3 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a6, a7 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a0, a0, 1 +; RV64IM-NEXT: ld ra, 488(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 480(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 496 +; RV64IM-NEXT: ret + %a.rev = call i4 @llvm.bitreverse.i4(i4 %a) + %b.rev = call i4 @llvm.bitreverse.i4(i4 %b) + %clmul = call i4 @llvm.clmul.i4(i4 %a.rev, i4 %b.rev) + %clmul.rev = call i4 @llvm.bitreverse.i4(i4 %clmul) + %res = lshr i4 %clmul.rev, 1 + ret i4 %res +} + + +define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { +; RV32IM-LABEL: clmulh_i8: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui t2, 61681 +; RV32IM-NEXT: lui t3, 209715 +; RV32IM-NEXT: lui a7, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t5, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t6, 1 +; RV32IM-NEXT: lui s0, 4 +; RV32IM-NEXT: lui s1, 8 +; RV32IM-NEXT: lui s2, 32 +; RV32IM-NEXT: lui s3, 64 +; RV32IM-NEXT: lui s5, 128 +; RV32IM-NEXT: lui s6, 256 +; RV32IM-NEXT: lui s7, 512 +; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi s4, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, s4 +; RV32IM-NEXT: or a3, t0, t1 +; RV32IM-NEXT: lui t0, 32768 +; RV32IM-NEXT: and t1, t4, s4 +; RV32IM-NEXT: or t4, t1, t5 +; RV32IM-NEXT: lui a6, 65536 +; RV32IM-NEXT: and a0, a0, s4 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or t5, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s4 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: addi t2, t2, -241 +; RV32IM-NEXT: addi t3, t3, 819 +; RV32IM-NEXT: addi a7, a7, 1365 +; RV32IM-NEXT: or a3, t5, a3 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: srli t4, a3, 4 +; RV32IM-NEXT: and a3, a3, t2 +; RV32IM-NEXT: srli t5, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and t4, t4, t2 +; RV32IM-NEXT: slli a3, a3, 4 +; RV32IM-NEXT: and t5, t5, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 2 +; RV32IM-NEXT: and a3, a3, t3 +; RV32IM-NEXT: srli t5, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a3, a3, 2 +; RV32IM-NEXT: and t5, t5, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t4, a3, 1 +; RV32IM-NEXT: and a3, a3, a7 +; RV32IM-NEXT: srli t5, a0, 1 +; RV32IM-NEXT: and a0, a0, a7 +; RV32IM-NEXT: and t4, t4, a7 +; RV32IM-NEXT: and a7, t5, a7 +; RV32IM-NEXT: lui a4, 524288 +; RV32IM-NEXT: slli t6, t6, 11 +; RV32IM-NEXT: slli a3, a3, 1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a3, t4, a3 +; RV32IM-NEXT: or a0, a7, a0 +; RV32IM-NEXT: andi t5, a0, 2 +; RV32IM-NEXT: andi t4, a0, 1 +; RV32IM-NEXT: and t6, a0, t6 +; RV32IM-NEXT: lui a7, 1 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui a7, 2 +; RV32IM-NEXT: and a7, a0, a7 +; RV32IM-NEXT: sw a7, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: sw s0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s2, a0, s2 +; RV32IM-NEXT: and a5, a0, s3 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s5 +; RV32IM-NEXT: sw a5, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s6 +; RV32IM-NEXT: sw a5, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s7, a0, s7 +; RV32IM-NEXT: and s8, a0, s8 +; RV32IM-NEXT: and a5, a0, s9 +; RV32IM-NEXT: sw a5, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s10 +; RV32IM-NEXT: sw a5, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, s11 +; RV32IM-NEXT: sw a5, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, ra +; RV32IM-NEXT: sw a5, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, t0 +; RV32IM-NEXT: sw a5, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a6 +; RV32IM-NEXT: sw a5, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a4 +; RV32IM-NEXT: sw a4, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 4 +; RV32IM-NEXT: andi a2, a0, 8 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a6, a0, 64 +; RV32IM-NEXT: andi a7, a0, 128 +; RV32IM-NEXT: andi t0, a0, 256 +; RV32IM-NEXT: andi t1, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul t5, a3, t5 +; RV32IM-NEXT: sw t5, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s10, a3, t4 +; RV32IM-NEXT: mul a1, a3, a1 +; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s6, a3, a2 +; RV32IM-NEXT: mul s5, a3, a4 +; RV32IM-NEXT: mul s3, a3, a5 +; RV32IM-NEXT: mul a1, a3, a6 +; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a3, a7 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s0, a3, t0 +; RV32IM-NEXT: mul t5, a3, t1 +; RV32IM-NEXT: mul s11, a3, a0 +; RV32IM-NEXT: mul a0, a3, t6 +; RV32IM-NEXT: sw a0, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a3, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a3, a0 +; RV32IM-NEXT: mul s1, a3, s1 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, a3, a0 +; RV32IM-NEXT: mul a0, a3, s2 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a3, a0 +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a3, a0 +; RV32IM-NEXT: mul a6, a3, s7 +; RV32IM-NEXT: mul t4, a3, s8 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s7, a3, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a3, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a3, a0 +; RV32IM-NEXT: lw a5, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a3, a5 +; RV32IM-NEXT: lw t0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a3, t0 +; RV32IM-NEXT: lw t6, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a3, t6 +; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s2, a3, s2 +; RV32IM-NEXT: lw s8, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s8, a3, s8 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a3, s9 +; RV32IM-NEXT: lw s9, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s10, s10, s9 +; RV32IM-NEXT: lw s9, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor s6, s9, s6 +; RV32IM-NEXT: xor s3, s5, s3 +; RV32IM-NEXT: xor t5, s0, t5 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s10, s6 +; RV32IM-NEXT: lw a4, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, s3, a4 +; RV32IM-NEXT: xor t1, t5, s11 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a4 +; RV32IM-NEXT: lw a4, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, t1, a4 +; RV32IM-NEXT: xor a5, a7, ra +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s7 +; RV32IM-NEXT: xor a0, a0, t6 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s2 +; RV32IM-NEXT: xor a4, a1, a4 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a0, a0, s8 +; RV32IM-NEXT: xor a2, a4, a2 +; RV32IM-NEXT: xor a0, a0, a3 +; RV32IM-NEXT: and a3, a2, s4 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, s4 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, t2 +; RV32IM-NEXT: and a1, a1, t2 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: andi a1, a0, 85 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: slli a1, a1, 1 +; RV32IM-NEXT: andi a0, a0, 340 +; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulh_i8: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -480 +; RV64IM-NEXT: sd ra, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: srli t1, a0, 56 +; RV64IM-NEXT: srliw t4, a0, 24 +; RV64IM-NEXT: slli a5, a0, 56 +; RV64IM-NEXT: lui s3, 61681 +; RV64IM-NEXT: lui t5, 209715 +; RV64IM-NEXT: lui s6, 349525 +; RV64IM-NEXT: srli s9, a1, 24 +; RV64IM-NEXT: srli s0, a1, 8 +; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli t2, a1, 56 +; RV64IM-NEXT: srliw s11, a1, 24 +; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: li t0, 1 +; RV64IM-NEXT: lui s1, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui t6, 4096 +; RV64IM-NEXT: lui s5, 8192 +; RV64IM-NEXT: lui s7, 4080 +; RV64IM-NEXT: and a2, a3, s7 +; RV64IM-NEXT: slli t3, s4, 24 +; RV64IM-NEXT: addi s8, s10, -256 +; RV64IM-NEXT: and a3, a7, t3 +; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s7 +; RV64IM-NEXT: slli t4, t4, 32 +; RV64IM-NEXT: addi s3, s3, -241 +; RV64IM-NEXT: addi s4, t5, 819 +; RV64IM-NEXT: addi s6, s6, 1365 +; RV64IM-NEXT: and a7, s9, s7 +; RV64IM-NEXT: and a4, a4, s8 +; RV64IM-NEXT: or a4, a4, t1 +; RV64IM-NEXT: and t1, a1, s7 +; RV64IM-NEXT: slli t5, s11, 32 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s9, a3, t4 +; RV64IM-NEXT: slli a3, s3, 32 +; RV64IM-NEXT: add s3, s3, a3 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli t4, t0, 11 +; RV64IM-NEXT: and a3, s0, t3 +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli s11, t0, 32 +; RV64IM-NEXT: and a7, ra, s8 +; RV64IM-NEXT: or a7, a7, t2 +; RV64IM-NEXT: slli ra, t0, 33 +; RV64IM-NEXT: slli t1, t1, 24 +; RV64IM-NEXT: or t1, t1, t5 +; RV64IM-NEXT: slli s0, t0, 34 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t0, 35 +; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a5, a0 +; RV64IM-NEXT: slli a4, t0, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a7, t0, 37 +; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: slli a6, t0, 38 +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, t1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s3 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s3 +; RV64IM-NEXT: and a2, a2, s3 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s3 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s4, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s6, a3, a1 +; RV64IM-NEXT: andi a1, s6, 2 +; RV64IM-NEXT: andi a2, s6, 1 +; RV64IM-NEXT: andi a3, s6, 4 +; RV64IM-NEXT: andi a4, s6, 8 +; RV64IM-NEXT: andi a5, s6, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s6, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s3, t0, 39 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 40 +; RV64IM-NEXT: and a2, s6, s1 +; RV64IM-NEXT: and a3, s6, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 41 +; RV64IM-NEXT: and a3, s6, t6 +; RV64IM-NEXT: and a4, s6, s5 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t0, 48 +; RV64IM-NEXT: and a4, s6, s11 +; RV64IM-NEXT: and a5, s6, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t0, 49 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 56 +; RV64IM-NEXT: and a2, s6, a3 +; RV64IM-NEXT: and a3, s6, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 57 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 42 +; RV64IM-NEXT: slli ra, t0, 43 +; RV64IM-NEXT: slli a4, t0, 44 +; RV64IM-NEXT: slli t6, t0, 45 +; RV64IM-NEXT: slli s1, t0, 46 +; RV64IM-NEXT: slli s2, t0, 47 +; RV64IM-NEXT: slli s4, t0, 50 +; RV64IM-NEXT: slli s5, t0, 51 +; RV64IM-NEXT: slli a1, t0, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 59 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 60 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 61 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t0, t0, 62 +; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t4 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s6, s9 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s6, s11 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 2048 +; RV64IM-NEXT: and a1, s6, s7 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t1, 16384 +; RV64IM-NEXT: and a1, s6, t1 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 32768 +; RV64IM-NEXT: and t2, s6, t2 +; RV64IM-NEXT: lui t3, 65536 +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 131072 +; RV64IM-NEXT: and a5, s6, t4 +; RV64IM-NEXT: lui t5, 262144 +; RV64IM-NEXT: and t0, s6, t5 +; RV64IM-NEXT: and s11, s6, s0 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a2 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s6, ra +; RV64IM-NEXT: and a1, s6, a4 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t6 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s2 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s4 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, s6, s5 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s6, a1 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s6, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s6, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s6, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, s6, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s6, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s6, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s6, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s6, a1 +; RV64IM-NEXT: andi a1, s6, 64 +; RV64IM-NEXT: andi a2, s6, 128 +; RV64IM-NEXT: andi a3, s6, 1024 +; RV64IM-NEXT: srliw a4, s6, 31 +; RV64IM-NEXT: srli s6, s6, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t3, a0, a3 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: mul t5, a0, t2 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a3, a0, s11 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: mul a5, a0, ra +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s6, s6, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, a0 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, a0, t3 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a0, a3 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, a0, s5 +; RV64IM-NEXT: xor t4, s6, t4 +; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: xor a7, a7, t6 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a3, a3, t0 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a5, s5, s7 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a5, a5, s8 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a5, a5, s9 +; RV64IM-NEXT: xor t2, t0, t3 +; RV64IM-NEXT: xor t1, t2, t1 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, ra +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a5, a5, s10 +; RV64IM-NEXT: xor a7, t1, a7 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a6, %hi(.LCPI16_0) +; RV64IM-NEXT: ld a6, %lo(.LCPI16_0)(a6) +; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a5, a0 +; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a7, t1 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: or a4, t0, a5 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a3, t0 +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a5, a7 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: andi a0, a0, 85 +; RV64IM-NEXT: and a1, a1, a6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 55 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 480 +; RV64IM-NEXT: ret + %a.ext = zext i8 %a to i16 + %b.ext = zext i8 %b to i16 + %clmul = call i16 @llvm.clmul.i16(i16 %a.ext, i16 %b.ext) + %res.ext = lshr i16 %clmul, 8 + %res = trunc i16 %res.ext to i8 + ret i8 %res +} + +define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { +; RV32IM-LABEL: clmulh_i16: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui ra, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui s10, 61681 +; RV32IM-NEXT: lui t2, 209715 +; RV32IM-NEXT: lui a4, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t5, a1, 24 +; RV32IM-NEXT: slli a5, a1, 24 +; RV32IM-NEXT: li t6, 1 +; RV32IM-NEXT: lui a7, 2 +; RV32IM-NEXT: lui a6, 4 +; RV32IM-NEXT: lui s2, 8 +; RV32IM-NEXT: lui s0, 32 +; RV32IM-NEXT: lui s1, 64 +; RV32IM-NEXT: lui t3, 128 +; RV32IM-NEXT: lui s3, 256 +; RV32IM-NEXT: lui s4, 512 +; RV32IM-NEXT: lui s6, 1024 +; RV32IM-NEXT: lui s7, 2048 +; RV32IM-NEXT: lui s8, 4096 +; RV32IM-NEXT: lui s9, 8192 +; RV32IM-NEXT: lui s11, 16384 +; RV32IM-NEXT: addi s5, ra, -256 +; RV32IM-NEXT: sw s5, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t0, t0, s5 +; RV32IM-NEXT: or t1, t0, t1 +; RV32IM-NEXT: lui a3, 32768 +; RV32IM-NEXT: and t4, t4, s5 +; RV32IM-NEXT: or t5, t4, t5 +; RV32IM-NEXT: lui t0, 65536 +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a2, a2, a0 +; RV32IM-NEXT: lui t4, 131072 +; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a5, a1 +; RV32IM-NEXT: lui a5, 262144 +; RV32IM-NEXT: addi s5, s10, -241 +; RV32IM-NEXT: addi s10, t2, 819 +; RV32IM-NEXT: addi a4, a4, 1365 +; RV32IM-NEXT: or a2, a2, t1 +; RV32IM-NEXT: or a0, a0, t5 +; RV32IM-NEXT: srli t1, a2, 4 +; RV32IM-NEXT: and a2, a2, s5 +; RV32IM-NEXT: srli t5, a0, 4 +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: and t1, t1, s5 +; RV32IM-NEXT: slli a2, a2, 4 +; RV32IM-NEXT: and t5, t5, s5 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a2, t1, a2 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t1, a2, 2 +; RV32IM-NEXT: and a2, a2, s10 +; RV32IM-NEXT: srli t5, a0, 2 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: and t1, t1, s10 +; RV32IM-NEXT: slli a2, a2, 2 +; RV32IM-NEXT: and t5, t5, s10 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a2, t1, a2 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: srli t1, a2, 1 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: srli t5, a0, 1 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and t1, t1, a4 +; RV32IM-NEXT: and t5, t5, a4 +; RV32IM-NEXT: lui a1, 524288 +; RV32IM-NEXT: slli t6, t6, 11 +; RV32IM-NEXT: slli a2, a2, 1 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a4, t1, a2 +; RV32IM-NEXT: or a0, t5, a0 +; RV32IM-NEXT: andi t2, a0, 2 +; RV32IM-NEXT: andi t5, a0, 1 +; RV32IM-NEXT: and t6, a0, t6 +; RV32IM-NEXT: lui a2, 1 +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a7 +; RV32IM-NEXT: sw a2, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a6 +; RV32IM-NEXT: sw a2, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s2 +; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and ra, a0, ra +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: sw s1, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, t3 +; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s3 +; RV32IM-NEXT: sw a2, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s4, a0, s4 +; RV32IM-NEXT: and a2, a0, s6 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s7 +; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s8 +; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s9 +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s11 +; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a3 +; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, t0 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, t4 +; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi a1, a0, 4 +; RV32IM-NEXT: andi a2, a0, 8 +; RV32IM-NEXT: andi a3, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a6, a0, 64 +; RV32IM-NEXT: andi a7, a0, 128 +; RV32IM-NEXT: andi t0, a0, 256 +; RV32IM-NEXT: andi t1, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul s11, a4, t2 +; RV32IM-NEXT: mul s7, a4, t5 +; RV32IM-NEXT: mul s8, a4, a1 +; RV32IM-NEXT: mul s3, a4, a2 +; RV32IM-NEXT: mul s2, a4, a3 +; RV32IM-NEXT: mul s1, a4, a5 +; RV32IM-NEXT: mul a1, a4, a6 +; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a4, a7 +; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t5, a4, t0 +; RV32IM-NEXT: mul t3, a4, t1 +; RV32IM-NEXT: mul s9, a4, a0 +; RV32IM-NEXT: mul a0, a4, t6 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a4, a0 +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t6, a4, a0 +; RV32IM-NEXT: mul s6, a4, ra +; RV32IM-NEXT: mul a0, a4, s0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a4, a0 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: mul a6, a4, s4 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t2, a4, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a4, a0 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: lw a5, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a4, a5 +; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a4, t0 +; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a4, t4 +; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a4, s0 +; RV32IM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul ra, a4, ra +; RV32IM-NEXT: sw ra, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a4, ra +; RV32IM-NEXT: xor s7, s7, s11 +; RV32IM-NEXT: xor s3, s8, s3 +; RV32IM-NEXT: xor s1, s2, s1 +; RV32IM-NEXT: xor t3, t5, t3 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s7, s3 +; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s1, a3 +; RV32IM-NEXT: xor t1, t3, s9 +; RV32IM-NEXT: xor a7, a7, t6 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, t1, a3 +; RV32IM-NEXT: xor a5, a7, s6 +; RV32IM-NEXT: xor a2, a2, t2 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a6 +; RV32IM-NEXT: lw a6, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s4 +; RV32IM-NEXT: xor a0, a0, t4 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s0 +; RV32IM-NEXT: xor a3, a1, a3 +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: lui a5, 21 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lui a6, 5 +; RV32IM-NEXT: addi a5, a5, 1364 +; RV32IM-NEXT: addi a6, a6, 1365 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: lw a7, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a7 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, a7 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: and a1, a1, s10 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: and a0, a0, a6 +; RV32IM-NEXT: and a1, a1, a5 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulh_i16: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -480 +; RV64IM-NEXT: sd ra, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: li s4, 255 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s3, 16 +; RV64IM-NEXT: srli t1, a0, 56 +; RV64IM-NEXT: srliw t4, a0, 24 +; RV64IM-NEXT: slli a7, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t5, 209715 +; RV64IM-NEXT: lui s6, 349525 +; RV64IM-NEXT: srli s9, a1, 24 +; RV64IM-NEXT: srli s0, a1, 8 +; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli t2, a1, 56 +; RV64IM-NEXT: srliw s11, a1, 24 +; RV64IM-NEXT: slli a5, a1, 56 +; RV64IM-NEXT: li t0, 1 +; RV64IM-NEXT: lui s1, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui t6, 4096 +; RV64IM-NEXT: lui s5, 8192 +; RV64IM-NEXT: lui s7, 4080 +; RV64IM-NEXT: and a2, a3, s7 +; RV64IM-NEXT: slli s10, s4, 24 +; RV64IM-NEXT: addi s8, s3, -256 +; RV64IM-NEXT: and a3, a6, s10 +; RV64IM-NEXT: sd s10, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s7 +; RV64IM-NEXT: slli t4, t4, 32 +; RV64IM-NEXT: addi s3, t3, -241 +; RV64IM-NEXT: addi s4, t5, 819 +; RV64IM-NEXT: addi s6, s6, 1365 +; RV64IM-NEXT: and a6, s9, s7 +; RV64IM-NEXT: and a4, a4, s8 +; RV64IM-NEXT: or a4, a4, t1 +; RV64IM-NEXT: and t1, a1, s7 +; RV64IM-NEXT: slli t3, s11, 32 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s9, a3, t4 +; RV64IM-NEXT: slli a3, s3, 32 +; RV64IM-NEXT: add s3, s3, a3 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli t4, t0, 11 +; RV64IM-NEXT: and a3, s0, s10 +; RV64IM-NEXT: or a3, a3, a6 +; RV64IM-NEXT: slli s11, t0, 32 +; RV64IM-NEXT: and a6, ra, s8 +; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: slli ra, t0, 33 +; RV64IM-NEXT: slli t1, t1, 24 +; RV64IM-NEXT: or t1, t1, t3 +; RV64IM-NEXT: slli s0, t0, 34 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t0, 35 +; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a7, a0 +; RV64IM-NEXT: slli a7, t0, 36 +; RV64IM-NEXT: or a3, a3, a6 +; RV64IM-NEXT: slli a6, t0, 37 +; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a5, a1 +; RV64IM-NEXT: slli a4, t0, 38 +; RV64IM-NEXT: sd a4, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, t1 +; RV64IM-NEXT: or a0, a0, a2 +; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s3 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s3 +; RV64IM-NEXT: and a2, a2, s3 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s3 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s4, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s6, a3, a1 +; RV64IM-NEXT: andi a1, s6, 2 +; RV64IM-NEXT: andi a2, s6, 1 +; RV64IM-NEXT: andi a3, s6, 4 +; RV64IM-NEXT: andi a4, s6, 8 +; RV64IM-NEXT: andi a5, s6, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a4 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s6, 256 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s6, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s3, t0, 39 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 40 +; RV64IM-NEXT: and a2, s6, s1 +; RV64IM-NEXT: and a3, s6, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 41 +; RV64IM-NEXT: and a3, s6, t6 +; RV64IM-NEXT: and a4, s6, s5 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t0, 48 +; RV64IM-NEXT: and a4, s6, s11 +; RV64IM-NEXT: and a5, s6, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t0, 49 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 56 +; RV64IM-NEXT: and a2, s6, a3 +; RV64IM-NEXT: and a3, s6, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 57 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a2, s6, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t0, 42 +; RV64IM-NEXT: slli ra, t0, 43 +; RV64IM-NEXT: slli a4, t0, 44 +; RV64IM-NEXT: slli t6, t0, 45 +; RV64IM-NEXT: slli s1, t0, 46 +; RV64IM-NEXT: slli s2, t0, 47 +; RV64IM-NEXT: slli s4, t0, 50 +; RV64IM-NEXT: slli s5, t0, 51 +; RV64IM-NEXT: slli a1, t0, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 53 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 54 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 55 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 58 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 59 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 60 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t0, 61 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t0, t0, 62 +; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t4 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s6, a3 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a1, 16 +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s6, s9 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s6, s11 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s7, 2048 +; RV64IM-NEXT: and a1, s6, s7 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t1, 16384 +; RV64IM-NEXT: and a1, s6, t1 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t2, 32768 +; RV64IM-NEXT: and t2, s6, t2 +; RV64IM-NEXT: lui t3, 65536 +; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 131072 +; RV64IM-NEXT: and a5, s6, t4 +; RV64IM-NEXT: lui t5, 262144 +; RV64IM-NEXT: and t0, s6, t5 +; RV64IM-NEXT: and s11, s6, s0 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a2 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s6, ra +; RV64IM-NEXT: and a1, s6, a4 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, t6 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s2 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, s4 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and s0, s6, s5 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s6, a1 +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s2, s6, a1 +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s3, s6, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s6, a1 +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s5, s6, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s6, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s6, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s6, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s6, a1 +; RV64IM-NEXT: andi a1, s6, 64 +; RV64IM-NEXT: andi a2, s6, 128 +; RV64IM-NEXT: andi a3, s6, 1024 +; RV64IM-NEXT: srliw a4, s6, 31 +; RV64IM-NEXT: srli s6, s6, 63 +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t3, a0, a3 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a7, a0, a1 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: mul t5, a0, t2 +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a3, a0, s11 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: mul a5, a0, ra +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t2, a0, a1 +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, a1 +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul s0, a0, s0 +; RV64IM-NEXT: mul s1, a0, s1 +; RV64IM-NEXT: mul s2, a0, s2 +; RV64IM-NEXT: mul s3, a0, s3 +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s5, a0, s5 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli s6, s6, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, a0 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, a0, t3 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a0, a3 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, a0, s5 +; RV64IM-NEXT: xor t4, s6, t4 +; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: xor a7, a7, t6 +; RV64IM-NEXT: xor a6, a6, t5 +; RV64IM-NEXT: xor a3, a3, t0 +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: xor a1, a1, s0 +; RV64IM-NEXT: xor a5, s5, s7 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: xor a1, a1, s1 +; RV64IM-NEXT: xor a5, a5, s8 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: xor a5, a5, s9 +; RV64IM-NEXT: xor t2, t0, t3 +; RV64IM-NEXT: xor t1, t2, t1 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, ra +; RV64IM-NEXT: xor a1, a1, s3 +; RV64IM-NEXT: xor a5, a5, s10 +; RV64IM-NEXT: xor a7, t1, a7 +; RV64IM-NEXT: xor a4, a6, a4 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a6, %hi(.LCPI17_0) +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a5, a0 +; RV64IM-NEXT: lui a5, 5 +; RV64IM-NEXT: ld a6, %lo(.LCPI17_0)(a6) +; RV64IM-NEXT: addi a5, a5, 1365 +; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld t2, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a7, t2 +; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: xor a3, a4, a3 +; RV64IM-NEXT: or a4, t0, a7 +; RV64IM-NEXT: lui t1, 4080 +; RV64IM-NEXT: and a7, a3, t1 +; RV64IM-NEXT: xor a2, a3, a2 +; RV64IM-NEXT: srli a3, a3, 8 +; RV64IM-NEXT: slli a7, a7, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld t0, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, t0 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw t0, a1, 24 +; RV64IM-NEXT: and a2, a2, t1 +; RV64IM-NEXT: srli t1, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli t0, t0, 32 +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a1, t1, t2 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a3, a7, t0 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a4, a3 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: and a1, a1, a6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 47 +; RV64IM-NEXT: srli a0, a0, 48 +; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 480 +; RV64IM-NEXT: ret + %a.ext = zext i16 %a to i32 + %b.ext = zext i16 %b to i32 + %clmul = call i32 @llvm.clmul.i32(i32 %a.ext, i32 %b.ext) + %res.ext = lshr i32 %clmul, 16 + %res = trunc i32 %res.ext to i16 + ret i16 %res +} + +define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { +; RV32IM-LABEL: clmulh_i32: +; RV32IM: # %bb.0: +; RV32IM-NEXT: addi sp, sp, -144 +; RV32IM-NEXT: sw ra, 140(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s0, 136(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 132(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s2, 128(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s3, 124(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s4, 120(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s5, 116(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s6, 112(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s7, 108(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s8, 104(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s9, 100(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s10, 96(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s11, 92(sp) # 4-byte Folded Spill +; RV32IM-NEXT: srli t0, a0, 8 +; RV32IM-NEXT: lui a3, 16 +; RV32IM-NEXT: srli t1, a0, 24 +; RV32IM-NEXT: slli a2, a0, 24 +; RV32IM-NEXT: lui s1, 61681 +; RV32IM-NEXT: lui s3, 209715 +; RV32IM-NEXT: lui a6, 349525 +; RV32IM-NEXT: srli t4, a1, 8 +; RV32IM-NEXT: srli t6, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t3, 1 +; RV32IM-NEXT: lui s11, 2 +; RV32IM-NEXT: lui t2, 4 +; RV32IM-NEXT: lui s10, 8 +; RV32IM-NEXT: lui t5, 32 +; RV32IM-NEXT: lui s0, 64 +; RV32IM-NEXT: lui s2, 128 +; RV32IM-NEXT: lui s4, 256 +; RV32IM-NEXT: lui s5, 512 +; RV32IM-NEXT: lui s6, 1024 +; RV32IM-NEXT: lui s7, 2048 +; RV32IM-NEXT: lui s8, 4096 +; RV32IM-NEXT: lui s9, 8192 +; RV32IM-NEXT: lui ra, 16384 +; RV32IM-NEXT: addi a3, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, a3 +; RV32IM-NEXT: or t1, t0, t1 +; RV32IM-NEXT: lui a7, 32768 +; RV32IM-NEXT: and t4, t4, a3 +; RV32IM-NEXT: or t6, t4, t6 +; RV32IM-NEXT: lui t0, 65536 +; RV32IM-NEXT: and a0, a0, a3 +; RV32IM-NEXT: mv t4, a3 +; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: slli a0, a0, 8 +; RV32IM-NEXT: or a2, a2, a0 +; RV32IM-NEXT: lui a3, 131072 +; RV32IM-NEXT: and a1, a1, t4 +; RV32IM-NEXT: slli a1, a1, 8 +; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: lui a1, 262144 +; RV32IM-NEXT: addi s1, s1, -241 +; RV32IM-NEXT: addi s3, s3, 819 +; RV32IM-NEXT: or a2, a2, t1 +; RV32IM-NEXT: addi a4, a6, 1365 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a0, a0, t6 +; RV32IM-NEXT: srli a6, a2, 4 +; RV32IM-NEXT: and a2, a2, s1 +; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: slli a2, a2, 4 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 4 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 2 +; RV32IM-NEXT: and a2, a2, s3 +; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: slli a2, a2, 2 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 2 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 1 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: and a6, a6, a4 +; RV32IM-NEXT: slli a2, a2, 1 +; RV32IM-NEXT: or a6, a6, a2 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: slli t3, t3, 11 +; RV32IM-NEXT: and t3, a0, t3 +; RV32IM-NEXT: lui a4, 1 +; RV32IM-NEXT: and t4, a0, a4 +; RV32IM-NEXT: and s11, a0, s11 +; RV32IM-NEXT: and a4, a0, t2 +; RV32IM-NEXT: sw a4, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s10 +; RV32IM-NEXT: sw a4, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t5 +; RV32IM-NEXT: sw a4, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and a4, a0, s2 +; RV32IM-NEXT: sw a4, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s4, a0, s4 +; RV32IM-NEXT: and a4, a0, s5 +; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s6 +; RV32IM-NEXT: sw a4, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s7 +; RV32IM-NEXT: sw a4, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s8 +; RV32IM-NEXT: sw a4, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s9 +; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, ra +; RV32IM-NEXT: sw a4, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a7 +; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t0 +; RV32IM-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a3 +; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi ra, a0, 2 +; RV32IM-NEXT: andi a1, a0, 1 +; RV32IM-NEXT: andi a2, a0, 4 +; RV32IM-NEXT: andi a3, a0, 8 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a7, a0, 64 +; RV32IM-NEXT: andi t0, a0, 128 +; RV32IM-NEXT: andi t1, a0, 256 +; RV32IM-NEXT: andi t2, a0, 512 +; RV32IM-NEXT: andi a0, a0, 1024 +; RV32IM-NEXT: mul ra, a6, ra +; RV32IM-NEXT: mul s10, a6, a1 +; RV32IM-NEXT: mul s9, a6, a2 +; RV32IM-NEXT: mul s5, a6, a3 +; RV32IM-NEXT: mul s6, a6, a4 +; RV32IM-NEXT: mul s2, a6, a5 +; RV32IM-NEXT: mul a1, a6, a7 +; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a6, t0 +; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t6, a6, t1 +; RV32IM-NEXT: mul t2, a6, t2 +; RV32IM-NEXT: mul s7, a6, a0 +; RV32IM-NEXT: mul a0, a6, t3 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, t4 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t1, a6, s11 +; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a6, a0 +; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t5, a6, a0 +; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s8, a6, a0 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, s0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a3, a6, a0 +; RV32IM-NEXT: mul a2, a6, s4 +; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a6, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t3, a6, a0 +; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s4, a6, a0 +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a6, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: lw a4, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a6, a4 +; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a6, t0 +; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a6, t4 +; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a6, s0 +; RV32IM-NEXT: lw s11, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, a6, s11 +; RV32IM-NEXT: sw s11, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a6, a6, s11 +; RV32IM-NEXT: xor s10, s10, ra +; RV32IM-NEXT: xor s5, s9, s5 +; RV32IM-NEXT: xor s2, s6, s2 +; RV32IM-NEXT: xor t2, t6, t2 +; RV32IM-NEXT: xor a7, t1, a7 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a1, a0 +; RV32IM-NEXT: xor a1, s10, s5 +; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s2, a3 +; RV32IM-NEXT: xor t1, t2, s7 +; RV32IM-NEXT: xor a7, a7, t5 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a1, a1, a3 +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, t1, a3 +; RV32IM-NEXT: xor a4, a7, s8 +; RV32IM-NEXT: xor a2, a2, t3 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a5, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: lw a5, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: lw a5, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a2, a2, s4 +; RV32IM-NEXT: xor a0, a0, t4 +; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a0, a0, s0 +; RV32IM-NEXT: lui a5, 349525 +; RV32IM-NEXT: addi a5, a5, 1364 +; RV32IM-NEXT: xor a3, a1, a3 +; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: xor a3, a3, a4 +; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a6 +; RV32IM-NEXT: srli a4, a2, 8 +; RV32IM-NEXT: xor a0, a2, a0 +; RV32IM-NEXT: slli a3, a3, 8 +; RV32IM-NEXT: and a2, a4, a6 +; RV32IM-NEXT: srli a0, a0, 24 +; RV32IM-NEXT: or a1, a1, a3 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 4 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a1, a1, s1 +; RV32IM-NEXT: slli a0, a0, 4 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 2 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a1, a1, s3 +; RV32IM-NEXT: slli a0, a0, 2 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a1, a0, 1 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a5 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a0, a0, 1 +; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s2, 128(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s3, 124(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s4, 120(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s5, 116(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s6, 112(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s7, 108(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s8, 104(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s9, 100(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s10, 96(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s11, 92(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 144 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: clmulh_i32: +; RV64IM: # %bb.0: +; RV64IM-NEXT: addi sp, sp, -512 +; RV64IM-NEXT: sd ra, 504(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s0, 496(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s1, 488(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s2, 480(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s3, 472(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s4, 464(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s5, 456(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s6, 448(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s7, 440(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s8, 432(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s9, 424(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 416(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 408(sp) # 8-byte Folded Spill +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli t0, a0, 8 +; RV64IM-NEXT: li s1, 255 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui a4, 16 +; RV64IM-NEXT: srli t2, a0, 56 +; RV64IM-NEXT: srliw t3, a0, 24 +; RV64IM-NEXT: slli a2, a0, 56 +; RV64IM-NEXT: lui t4, 61681 +; RV64IM-NEXT: lui s0, 209715 +; RV64IM-NEXT: lui s9, 349525 +; RV64IM-NEXT: srli s7, a1, 24 +; RV64IM-NEXT: srli s5, a1, 8 +; RV64IM-NEXT: srli t5, a1, 40 +; RV64IM-NEXT: srli a7, a1, 56 +; RV64IM-NEXT: srliw ra, a1, 24 +; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: li t1, 1 +; RV64IM-NEXT: lui s11, 128 +; RV64IM-NEXT: lui s2, 256 +; RV64IM-NEXT: lui s3, 4096 +; RV64IM-NEXT: lui t6, 8192 +; RV64IM-NEXT: lui s8, 4080 +; RV64IM-NEXT: and a3, a3, s8 +; RV64IM-NEXT: slli s1, s1, 24 +; RV64IM-NEXT: addi s10, a4, -256 +; RV64IM-NEXT: and t0, t0, s1 +; RV64IM-NEXT: sd s1, 400(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, t0, a3 +; RV64IM-NEXT: and t0, a0, s8 +; RV64IM-NEXT: slli t3, t3, 32 +; RV64IM-NEXT: addi s4, t4, -241 +; RV64IM-NEXT: addi s6, s0, 819 +; RV64IM-NEXT: addi a4, s9, 1365 +; RV64IM-NEXT: and t4, s7, s8 +; RV64IM-NEXT: and a5, a5, s10 +; RV64IM-NEXT: or a5, a5, t2 +; RV64IM-NEXT: and t2, a1, s8 +; RV64IM-NEXT: slli s0, ra, 32 +; RV64IM-NEXT: slli t0, t0, 24 +; RV64IM-NEXT: or s9, t0, t3 +; RV64IM-NEXT: slli t0, s4, 32 +; RV64IM-NEXT: add s4, s4, t0 +; RV64IM-NEXT: slli t0, s6, 32 +; RV64IM-NEXT: add s6, s6, t0 +; RV64IM-NEXT: slli s7, t1, 11 +; RV64IM-NEXT: and t0, s5, s1 +; RV64IM-NEXT: or t0, t0, t4 +; RV64IM-NEXT: slli t4, t1, 32 +; RV64IM-NEXT: and t3, t5, s10 +; RV64IM-NEXT: or a7, t3, a7 +; RV64IM-NEXT: slli ra, t1, 33 +; RV64IM-NEXT: slli t2, t2, 24 +; RV64IM-NEXT: or t2, t2, s0 +; RV64IM-NEXT: slli s0, t1, 34 +; RV64IM-NEXT: or a3, a3, a5 +; RV64IM-NEXT: slli s1, t1, 35 +; RV64IM-NEXT: sd s10, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: slli a2, t1, 36 +; RV64IM-NEXT: sd a2, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, t0, a7 +; RV64IM-NEXT: slli a7, t1, 37 +; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: slli a1, a1, 40 +; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: sd a4, 392(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a5, a4, 32 +; RV64IM-NEXT: add a5, a4, a5 +; RV64IM-NEXT: or a0, a0, s9 +; RV64IM-NEXT: or a1, a1, t2 +; RV64IM-NEXT: or a0, a0, a3 +; RV64IM-NEXT: or a1, a1, a2 +; RV64IM-NEXT: srli a2, a0, 4 +; RV64IM-NEXT: sd s4, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s4 +; RV64IM-NEXT: srli a3, a1, 4 +; RV64IM-NEXT: and a1, a1, s4 +; RV64IM-NEXT: and a2, a2, s4 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: and a3, a3, s4 +; RV64IM-NEXT: slli a1, a1, 4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 2 +; RV64IM-NEXT: sd s6, 376(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s6 +; RV64IM-NEXT: srli a3, a1, 2 +; RV64IM-NEXT: and a1, a1, s6 +; RV64IM-NEXT: and a2, a2, s6 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: and a3, a3, s6 +; RV64IM-NEXT: slli a1, a1, 2 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: srli a2, a0, 1 +; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: srli a3, a1, 1 +; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: and a2, a2, a5 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: and a3, a3, a5 +; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or s5, a3, a1 +; RV64IM-NEXT: andi a1, s5, 2 +; RV64IM-NEXT: andi a2, s5, 1 +; RV64IM-NEXT: andi a3, s5, 4 +; RV64IM-NEXT: andi a5, s5, 8 +; RV64IM-NEXT: andi a6, s5, 16 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 360(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 32 +; RV64IM-NEXT: mul a2, a0, a3 +; RV64IM-NEXT: mul a3, a0, a5 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a2, s5, 256 +; RV64IM-NEXT: mul a3, a0, a6 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a3, a1 +; RV64IM-NEXT: sd a1, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: andi a1, s5, 512 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, t1, 38 +; RV64IM-NEXT: lui a1, 2 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: lui a2, 4 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 40 +; RV64IM-NEXT: and a2, s5, s11 +; RV64IM-NEXT: and a3, s5, s2 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 41 +; RV64IM-NEXT: and a3, s5, s3 +; RV64IM-NEXT: and a4, s5, t6 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: xor a3, a3, a4 +; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a3, t1, 48 +; RV64IM-NEXT: and a4, s5, t4 +; RV64IM-NEXT: and a5, s5, ra +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a5, a0, a5 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, t1, 49 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 56 +; RV64IM-NEXT: and a2, s5, a3 +; RV64IM-NEXT: and a3, s5, a4 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: xor a2, a2, a3 +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a2, t1, 57 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a2, s5, a2 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: mul a2, a0, a2 +; RV64IM-NEXT: xor a1, a1, a2 +; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli ra, t1, 39 +; RV64IM-NEXT: slli a2, t1, 42 +; RV64IM-NEXT: slli a4, t1, 43 +; RV64IM-NEXT: slli s2, t1, 44 +; RV64IM-NEXT: slli s3, t1, 45 +; RV64IM-NEXT: slli s6, t1, 46 +; RV64IM-NEXT: slli a1, t1, 47 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 50 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 51 +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 52 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 53 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 54 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 55 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 58 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 59 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 60 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a1, t1, 61 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t1, t1, 62 +; RV64IM-NEXT: sd t1, 104(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and t1, s5, s7 +; RV64IM-NEXT: lui a3, 1 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a3, 8 +; RV64IM-NEXT: and a1, s5, a3 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui a1, 16 +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s9, 32 +; RV64IM-NEXT: and a1, s5, s9 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s11, 64 +; RV64IM-NEXT: and a1, s5, s11 +; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s10, 512 +; RV64IM-NEXT: and a1, s5, s10 +; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui s8, 1024 +; RV64IM-NEXT: and a1, s5, s8 +; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t0, 2048 +; RV64IM-NEXT: and t0, s5, t0 +; RV64IM-NEXT: lui t2, 16384 +; RV64IM-NEXT: and t2, s5, t2 +; RV64IM-NEXT: lui t3, 32768 +; RV64IM-NEXT: and a1, s5, t3 +; RV64IM-NEXT: sd a1, 120(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t4, 65536 +; RV64IM-NEXT: and a1, s5, t4 +; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill +; RV64IM-NEXT: lui t5, 131072 +; RV64IM-NEXT: and a5, s5, t5 +; RV64IM-NEXT: lui t6, 262144 +; RV64IM-NEXT: and a6, s5, t6 +; RV64IM-NEXT: and s11, s5, s0 +; RV64IM-NEXT: and t5, s5, s1 +; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t6, s5, a1 +; RV64IM-NEXT: and a1, s5, a7 +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s4 +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and ra, s5, ra +; RV64IM-NEXT: and a1, s5, a2 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a4 +; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s3 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s6 +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s4, s5, a1 +; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s6, s5, a1 +; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s7, s5, a1 +; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s8, s5, a1 +; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s9, s5, a1 +; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s10, s5, a1 +; RV64IM-NEXT: andi a1, s5, 64 +; RV64IM-NEXT: andi a2, s5, 128 +; RV64IM-NEXT: andi a3, s5, 1024 +; RV64IM-NEXT: srliw a4, s5, 31 +; RV64IM-NEXT: srli t3, s5, 63 +; RV64IM-NEXT: mul s2, a0, a1 +; RV64IM-NEXT: mul a1, a0, a2 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s0, a0, a3 +; RV64IM-NEXT: mul a1, a0, t1 +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t4, a0, a1 +; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s3, a0, a1 +; RV64IM-NEXT: mul a1, a0, t0 +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a7, a0, t2 +; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s1, a0, a1 +; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a5 +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, a6 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli a4, a4, 31 +; RV64IM-NEXT: mul a5, a0, s11 +; RV64IM-NEXT: mul t2, a0, t5 +; RV64IM-NEXT: mul s11, a0, t6 +; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul a1, a0, ra +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul s5, a0, a1 +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: ld a3, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a3, a0, a3 +; RV64IM-NEXT: ld t0, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, t0 +; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t6, a0, t6 +; RV64IM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul ra, a0, ra +; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: mul s6, a0, s6 +; RV64IM-NEXT: mul s7, a0, s7 +; RV64IM-NEXT: mul s8, a0, s8 +; RV64IM-NEXT: mul s9, a0, s9 +; RV64IM-NEXT: mul s10, a0, s10 +; RV64IM-NEXT: slli t3, t3, 63 +; RV64IM-NEXT: mul a4, a0, a4 +; RV64IM-NEXT: mul a0, a0, t3 +; RV64IM-NEXT: sd a0, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld t3, 360(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, a0, s2 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, a0, s0 +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a0, a5 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, a0, s6 +; RV64IM-NEXT: xor t3, t3, s2 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, a0 +; RV64IM-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, t4, a0 +; RV64IM-NEXT: xor t1, t1, s3 +; RV64IM-NEXT: xor a7, a7, s1 +; RV64IM-NEXT: xor a5, a5, t2 +; RV64IM-NEXT: xor a2, a2, a6 +; RV64IM-NEXT: xor a1, a1, a3 +; RV64IM-NEXT: xor a3, s6, s7 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, t3, a0 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s0, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: xor a5, a5, s11 +; RV64IM-NEXT: xor a2, a2, t5 +; RV64IM-NEXT: xor a1, a1, t0 +; RV64IM-NEXT: xor a3, a3, s8 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t3, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: xor a2, a2, s5 +; RV64IM-NEXT: xor a1, a1, t6 +; RV64IM-NEXT: xor a3, a3, s9 +; RV64IM-NEXT: xor t2, a6, t2 +; RV64IM-NEXT: xor t0, t2, t0 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, ra +; RV64IM-NEXT: xor a3, a3, s10 +; RV64IM-NEXT: xor t0, t0, t1 +; RV64IM-NEXT: xor a4, a7, a4 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a7, %hi(.LCPI18_0) +; RV64IM-NEXT: ld a7, %lo(.LCPI18_0)(a7) +; RV64IM-NEXT: slli a6, a6, 56 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a0, a3, a0 +; RV64IM-NEXT: ld t1, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, t0, t1 +; RV64IM-NEXT: xor a4, t0, a4 +; RV64IM-NEXT: slli a3, a3, 40 +; RV64IM-NEXT: xor a4, a4, a5 +; RV64IM-NEXT: or a3, a6, a3 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a4, t0 +; RV64IM-NEXT: xor a2, a4, a2 +; RV64IM-NEXT: srli a4, a4, 8 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: xor a1, a2, a1 +; RV64IM-NEXT: ld a6, 400(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a4, a4, a6 +; RV64IM-NEXT: srli a2, a2, 24 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: or a2, a4, a2 +; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a4, a5, a6 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: or a3, a3, a4 +; RV64IM-NEXT: or a0, a2, a0 +; RV64IM-NEXT: or a0, a3, a0 +; RV64IM-NEXT: srli a1, a0, 4 +; RV64IM-NEXT: ld a2, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 4 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 2 +; RV64IM-NEXT: ld a2, 376(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: slli a0, a0, 2 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: ld a2, 392(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a0, a0, a2 +; RV64IM-NEXT: and a1, a1, a7 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 31 +; RV64IM-NEXT: srli a0, a0, 32 +; RV64IM-NEXT: ld ra, 504(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s0, 496(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s1, 488(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s2, 480(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s3, 472(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s4, 464(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s5, 456(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s6, 448(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s7, 440(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s8, 432(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s9, 424(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s10, 416(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld s11, 408(sp) # 8-byte Folded Reload +; RV64IM-NEXT: addi sp, sp, 512 +; RV64IM-NEXT: ret + %a.ext = zext i32 %a to i64 + %b.ext = zext i32 %b to i64 + %clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext) + %res.ext = lshr i64 %clmul, 32 + %res = trunc i64 %res.ext to i32 + ret i32 %res +} + +define i4 @clmulh_constfold_i4() nounwind { +; CHECK-LABEL: clmulh_constfold_i4: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret + %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2) + %res.ext = lshr i8 %clmul, 4 + %res = trunc i8 %res.ext to i4 + ret i4 %res +} + +define i16 @clmulh_constfold_i16() nounwind { +; CHECK-LABEL: clmulh_constfold_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 11 +; CHECK-NEXT: addi a0, a0, -1366 +; CHECK-NEXT: ret + %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1) + %res.ext = lshr i32 %clmul, 16 + %res = trunc i32 %res.ext to i16 + ret i16 %res +} From 26e6706998dd0de2a295649f0996dcaf39f929e0 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 13:56:27 +0000 Subject: [PATCH 05/13] [ISel] Strip bad tests --- llvm/test/CodeGen/RISCV/clmul.ll | 46 -------------------------------- 1 file changed, 46 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll index 429d34a0f9851..8961e630700f3 100644 --- a/llvm/test/CodeGen/RISCV/clmul.ll +++ b/llvm/test/CodeGen/RISCV/clmul.ll @@ -7334,29 +7334,6 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ret i32 %res } -define i4 @clmulr_constfold_i4() nounwind { -; CHECK-LABEL: clmulr_constfold_i4: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: ret - %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2) - %res.ext = lshr i8 %clmul, 3 - %res = trunc i8 %res.ext to i4 - ret i4 %res -} - -define i16 @clmulr_constfold_i16() nounwind { -; CHECK-LABEL: clmulr_constfold_i16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 5 -; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: ret - %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1) - %res.ext = lshr i32 %clmul, 15 - %res = trunc i32 %res.ext to i16 - ret i16 %res -} - define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV32IM-LABEL: clmulh_i4: ; RV32IM: # %bb.0: @@ -11483,26 +11460,3 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { %res = trunc i64 %res.ext to i32 ret i32 %res } - -define i4 @clmulh_constfold_i4() nounwind { -; CHECK-LABEL: clmulh_constfold_i4: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: ret - %clmul = call i8 @llvm.clmul.i8(i8 1, i8 2) - %res.ext = lshr i8 %clmul, 4 - %res = trunc i8 %res.ext to i4 - ret i4 %res -} - -define i16 @clmulh_constfold_i16() nounwind { -; CHECK-LABEL: clmulh_constfold_i16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 11 -; CHECK-NEXT: addi a0, a0, -1366 -; CHECK-NEXT: ret - %clmul = call i32 @llvm.clmul.i16(i32 -2, i32 -1) - %res.ext = lshr i32 %clmul, 16 - %res = trunc i32 %res.ext to i16 - ret i16 %res -} From c6141667472a4049b4ff5f6e679733ac573cca59 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 15:26:44 +0000 Subject: [PATCH 06/13] [ISel] Address pfusik's review, fixup test --- llvm/docs/LangRef.rst | 2 +- llvm/include/llvm/CodeGen/SDPatternMatch.h | 4 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 20 +- llvm/test/CodeGen/RISCV/clmul.ll | 4636 ++++++++--------- 4 files changed, 2330 insertions(+), 2332 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 4fdca9c1a4dbc..1000693f8261e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -18422,7 +18422,7 @@ Semantics: """""""""" The '``llvm.clmul``' intrinsic computes carry-less multiply of its arguments, -which is the result of applying the standard Eucledian multiplication algorithm, +which is the result of applying the standard Euclidean multiplication algorithm, where all of the additions are replaced with XORs, and returns the low-bits. The vector variants operate lane-wise. diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 445e025861fd2..de5b20d417ed9 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -920,8 +920,8 @@ inline BinaryOpc_match m_Rotr(const LHS &L, const RHS &R) { } template -inline BinaryOpc_match m_Clmul(const LHS &L, const RHS &R) { - return BinaryOpc_match(ISD::CLMUL, L, R); +inline BinaryOpc_match m_Clmul(const LHS &L, const RHS &R) { + return BinaryOpc_match(ISD::CLMUL, L, R); } template diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0014bcf60c0e6..eda832e981c64 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8314,16 +8314,16 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { isOperationLegalOrCustomOrPromote(Opcode, VT.getVectorElementType())) return DAG.UnrollVectorOp(Node); - SDValue Res = DAG.getConstant(0, DL, VT); switch (Opcode) { case ISD::CLMUL: { + SDValue Res = DAG.getConstant(0, DL, VT); for (unsigned I = 0; I < BW; ++I) { SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT); SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask); SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, X, YMasked); Res = DAG.getNode(ISD::XOR, DL, VT, Res, Mul); } - break; + return Res; } case ISD::CLMULR: case ISD::CLMULH: { @@ -8335,12 +8335,11 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X); SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y); SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev); - Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul); - Res = Opcode == ISD::CLMULR - ? Res - : DAG.getNode(ISD::SRL, DL, VT, Res, - DAG.getShiftAmountConstant(1, VT, DL)); - break; + SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul); + if (Opcode == ISD::CLMULR) + Res = DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getShiftAmountConstant(1, VT, DL)); + return Res; } SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X); SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y); @@ -8348,11 +8347,10 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { unsigned ShtAmt = Opcode == ISD::CLMULR ? BW - 1 : BW; SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul, DAG.getShiftAmountConstant(ShtAmt, ExtVT, DL)); - Res = DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits); - break; + return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits); } } - return Res; + llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH"); } // TODO: Merge with expandFunnelShift. diff --git a/llvm/test/CodeGen/RISCV/clmul.ll b/llvm/test/CodeGen/RISCV/clmul.ll index 8961e630700f3..f0f34563e4c87 100644 --- a/llvm/test/CodeGen/RISCV/clmul.ll +++ b/llvm/test/CodeGen/RISCV/clmul.ll @@ -3502,8 +3502,9 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV32IM-NEXT: andi a1, a0, 5 ; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: slli a1, a1, 1 -; RV32IM-NEXT: andi a0, a0, 5 +; RV32IM-NEXT: andi a0, a0, 20 ; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -3536,83 +3537,82 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a4, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a7, a0, 8 ; RV64IM-NEXT: li s4, 255 -; RV64IM-NEXT: srli a5, a0, 40 -; RV64IM-NEXT: lui s8, 16 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s10, 16 ; RV64IM-NEXT: srli t1, a0, 56 -; RV64IM-NEXT: srliw t3, a0, 24 -; RV64IM-NEXT: slli t4, a0, 56 +; RV64IM-NEXT: srliw t4, a0, 24 +; RV64IM-NEXT: slli a5, a0, 56 ; RV64IM-NEXT: lui s3, 61681 ; RV64IM-NEXT: lui t5, 209715 ; RV64IM-NEXT: lui s6, 349525 ; RV64IM-NEXT: srli s9, a1, 24 ; RV64IM-NEXT: srli s0, a1, 8 -; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srli ra, a1, 40 ; RV64IM-NEXT: srli t2, a1, 56 ; RV64IM-NEXT: srliw s11, a1, 24 -; RV64IM-NEXT: slli a3, a1, 56 +; RV64IM-NEXT: slli a6, a1, 56 ; RV64IM-NEXT: li t0, 1 ; RV64IM-NEXT: lui s1, 128 ; RV64IM-NEXT: lui s2, 256 ; RV64IM-NEXT: lui t6, 4096 ; RV64IM-NEXT: lui s5, 8192 ; RV64IM-NEXT: lui s7, 4080 -; RV64IM-NEXT: and a2, a4, s7 -; RV64IM-NEXT: slli ra, s4, 24 -; RV64IM-NEXT: addi s10, s8, -256 -; RV64IM-NEXT: and a4, a6, ra -; RV64IM-NEXT: sd ra, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a4, a2 -; RV64IM-NEXT: and a4, a0, s7 -; RV64IM-NEXT: slli t3, t3, 32 +; RV64IM-NEXT: and a2, a3, s7 +; RV64IM-NEXT: slli t3, s4, 24 +; RV64IM-NEXT: addi s8, s10, -256 +; RV64IM-NEXT: and a3, a7, t3 +; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s7 +; RV64IM-NEXT: slli t4, t4, 32 ; RV64IM-NEXT: addi s3, s3, -241 ; RV64IM-NEXT: addi s4, t5, 819 ; RV64IM-NEXT: addi s6, s6, 1365 -; RV64IM-NEXT: and a6, s9, s7 -; RV64IM-NEXT: and a5, a5, s10 -; RV64IM-NEXT: or a5, a5, t1 +; RV64IM-NEXT: and a7, s9, s7 +; RV64IM-NEXT: and a4, a4, s8 +; RV64IM-NEXT: or a4, a4, t1 ; RV64IM-NEXT: and t1, a1, s7 ; RV64IM-NEXT: slli t5, s11, 32 -; RV64IM-NEXT: slli a4, a4, 24 -; RV64IM-NEXT: or s9, a4, t3 -; RV64IM-NEXT: slli a4, s3, 32 -; RV64IM-NEXT: add s3, s3, a4 -; RV64IM-NEXT: slli a4, s4, 32 -; RV64IM-NEXT: add s4, s4, a4 -; RV64IM-NEXT: slli a4, s6, 32 -; RV64IM-NEXT: add s6, s6, a4 -; RV64IM-NEXT: slli t3, t0, 11 -; RV64IM-NEXT: and a4, s0, ra -; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s9, a3, t4 +; RV64IM-NEXT: slli a3, s3, 32 +; RV64IM-NEXT: add s3, s3, a3 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli t4, t0, 11 +; RV64IM-NEXT: and a3, s0, t3 +; RV64IM-NEXT: or a3, a3, a7 ; RV64IM-NEXT: slli s11, t0, 32 -; RV64IM-NEXT: and a6, a7, s10 -; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: and a7, ra, s8 +; RV64IM-NEXT: or a7, a7, t2 ; RV64IM-NEXT: slli ra, t0, 33 ; RV64IM-NEXT: slli t1, t1, 24 -; RV64IM-NEXT: or a7, t1, t5 +; RV64IM-NEXT: or t1, t1, t5 ; RV64IM-NEXT: slli s0, t0, 34 -; RV64IM-NEXT: or a2, a2, a5 -; RV64IM-NEXT: slli a5, t0, 35 -; RV64IM-NEXT: sd a5, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 344(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t0, 35 +; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, t4, a0 -; RV64IM-NEXT: slli a5, t0, 36 -; RV64IM-NEXT: sd a5, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a4, a4, a6 -; RV64IM-NEXT: slli a6, t0, 37 -; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: or a0, a5, a0 +; RV64IM-NEXT: slli a4, t0, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a7, t0, 37 +; RV64IM-NEXT: and a1, a1, s8 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a3, a1 -; RV64IM-NEXT: slli a3, t0, 38 -; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: slli a6, t0, 38 ; RV64IM-NEXT: or a0, a0, s9 -; RV64IM-NEXT: or a1, a1, a7 +; RV64IM-NEXT: or a1, a1, t1 ; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a1, a1, a3 ; RV64IM-NEXT: srli a2, a0, 4 ; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a0, a0, s3 @@ -3669,7 +3669,7 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: xor a1, a2, a1 ; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a7, t0, 39 +; RV64IM-NEXT: slli s3, t0, 39 ; RV64IM-NEXT: lui a1, 2 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: lui a2, 4 @@ -3684,51 +3684,52 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 41 ; RV64IM-NEXT: and a3, s6, t6 ; RV64IM-NEXT: and a4, s6, s5 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t0, 48 ; RV64IM-NEXT: and a4, s6, s11 ; RV64IM-NEXT: and a5, s6, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t0, 49 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 56 ; RV64IM-NEXT: and a2, s6, a3 ; RV64IM-NEXT: and a3, s6, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 57 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 42 ; RV64IM-NEXT: slli ra, t0, 43 ; RV64IM-NEXT: slli a4, t0, 44 ; RV64IM-NEXT: slli t6, t0, 45 ; RV64IM-NEXT: slli s1, t0, 46 ; RV64IM-NEXT: slli s2, t0, 47 -; RV64IM-NEXT: slli s3, t0, 50 -; RV64IM-NEXT: slli s4, t0, 51 -; RV64IM-NEXT: slli s5, t0, 52 +; RV64IM-NEXT: slli s4, t0, 50 +; RV64IM-NEXT: slli s5, t0, 51 +; RV64IM-NEXT: slli a1, t0, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 53 ; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 54 @@ -3745,7 +3746,7 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli t0, t0, 62 ; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: and a1, s6, t4 ; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui a3, 1 ; RV64IM-NEXT: and a1, s6, a3 @@ -3753,7 +3754,7 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s6, a3 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: and a1, s6, s10 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s9, 32 ; RV64IM-NEXT: and a1, s6, s9 @@ -3785,16 +3786,15 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: and s11, s6, s0 ; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a6 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: and a1, s6, s3 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a2 ; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill @@ -3807,10 +3807,11 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, s2 ; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: and a1, s6, s4 ; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and s0, s6, s4 -; RV64IM-NEXT: and s1, s6, s5 +; RV64IM-NEXT: and s0, s6, s5 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s6, a1 ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s2, s6, a1 ; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload @@ -3834,14 +3835,14 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: srli s6, s6, 63 ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul t3, a0, a3 ; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t1, a0, a1 ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload @@ -3849,17 +3850,17 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t6, a0, a1 ; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a6, a0, a1 ; RV64IM-NEXT: mul t5, a0, t2 @@ -3867,25 +3868,25 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a5 -; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, t0 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a3, a0, s11 -; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t0, a0, a1 -; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a2, a0, a1 ; RV64IM-NEXT: mul a5, a0, ra @@ -3897,7 +3898,7 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul ra, a0, a1 ; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul s0, a0, s0 @@ -3905,7 +3906,6 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul s2, a0, s2 ; RV64IM-NEXT: mul s3, a0, s3 ; RV64IM-NEXT: mul s4, a0, s4 -; RV64IM-NEXT: sd s4, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s5, a0, s5 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -3914,80 +3914,83 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli s6, s6, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, s6, s4 -; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, s4, t4 -; RV64IM-NEXT: ld s4, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, s4, t3 -; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s4, t1 -; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, s4, a7 -; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, s4, a6 -; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, s4, a3 -; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, s4, a2 -; RV64IM-NEXT: ld s4, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, s4, a1 -; RV64IM-NEXT: ld s4, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, s4, s5 +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, a0 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, a0, t3 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a0, a3 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, a0, s5 ; RV64IM-NEXT: xor t4, s6, t4 -; RV64IM-NEXT: ld s4, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, s4 -; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, s4 +; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 ; RV64IM-NEXT: xor a7, a7, t6 ; RV64IM-NEXT: xor a6, a6, t5 ; RV64IM-NEXT: xor a3, a3, t0 ; RV64IM-NEXT: xor a2, a2, a5 ; RV64IM-NEXT: xor a1, a1, s0 ; RV64IM-NEXT: xor a5, s5, s7 -; RV64IM-NEXT: ld t0, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t4, t0 -; RV64IM-NEXT: ld t4, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, t4 -; RV64IM-NEXT: ld t4, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, t4 -; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, t4 -; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, t4 -; RV64IM-NEXT: ld t4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t4 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 ; RV64IM-NEXT: xor a2, a2, t2 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a5, a5, s8 -; RV64IM-NEXT: ld t2, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, t2 -; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, t2 -; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 ; RV64IM-NEXT: xor a2, a2, s11 ; RV64IM-NEXT: xor a1, a1, s2 ; RV64IM-NEXT: xor a5, a5, s9 ; RV64IM-NEXT: xor t2, t0, t3 ; RV64IM-NEXT: xor t1, t2, t1 -; RV64IM-NEXT: ld t2, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, t2 -; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 ; RV64IM-NEXT: xor a2, a2, ra ; RV64IM-NEXT: xor a1, a1, s3 ; RV64IM-NEXT: xor a5, a5, s10 ; RV64IM-NEXT: xor a7, t1, a7 ; RV64IM-NEXT: xor a4, a6, a4 -; RV64IM-NEXT: ld a6, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a6 -; RV64IM-NEXT: ld a6, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a6 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a6, %hi(.LCPI7_0) +; RV64IM-NEXT: ld a6, %lo(.LCPI7_0)(a6) ; RV64IM-NEXT: slli t0, t0, 56 -; RV64IM-NEXT: ld a6, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a0, a5, a0 ; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a5, a7, t1 @@ -3995,24 +3998,24 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli a5, a5, 40 ; RV64IM-NEXT: xor a3, a4, a3 ; RV64IM-NEXT: or a4, t0, a5 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a5, a3, a7 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a3, t0 ; RV64IM-NEXT: xor a2, a3, a2 ; RV64IM-NEXT: srli a3, a3, 8 ; RV64IM-NEXT: slli a5, a5, 24 ; RV64IM-NEXT: xor a1, a2, a1 -; RV64IM-NEXT: ld a6, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a3, a3, a6 +; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a7 ; RV64IM-NEXT: srli a2, a2, 24 -; RV64IM-NEXT: srliw a6, a1, 24 -; RV64IM-NEXT: and a2, a2, a7 -; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: slli a7, a7, 32 ; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a1, a7, t1 +; RV64IM-NEXT: and a1, t0, t1 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a5, a6 +; RV64IM-NEXT: or a3, a5, a7 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -4029,11 +4032,13 @@ define i4 @clmulr_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: slli a0, a0, 2 ; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: andi a1, a0, 5 -; RV64IM-NEXT: srli a0, a0, 1 -; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: srli a1, a0, 1 ; RV64IM-NEXT: andi a0, a0, 5 -; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: and a1, a1, a6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 59 +; RV64IM-NEXT: srli a0, a0, 60 ; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload @@ -4078,250 +4083,253 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV32IM-NEXT: lui a3, 16 ; RV32IM-NEXT: srli t1, a0, 24 ; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui t3, 61681 -; RV32IM-NEXT: lui t5, 209715 -; RV32IM-NEXT: lui t6, 349525 +; RV32IM-NEXT: lui s1, 61681 +; RV32IM-NEXT: lui s3, 209715 +; RV32IM-NEXT: lui a6, 349525 ; RV32IM-NEXT: srli t4, a1, 8 -; RV32IM-NEXT: srli a4, a1, 24 -; RV32IM-NEXT: slli a5, a1, 24 -; RV32IM-NEXT: li s7, 1 +; RV32IM-NEXT: srli t6, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t3, 1 +; RV32IM-NEXT: lui s11, 2 ; RV32IM-NEXT: lui t2, 4 -; RV32IM-NEXT: lui s0, 8 -; RV32IM-NEXT: lui s1, 32 -; RV32IM-NEXT: lui s2, 64 -; RV32IM-NEXT: lui s3, 128 +; RV32IM-NEXT: lui s10, 8 +; RV32IM-NEXT: lui t5, 32 +; RV32IM-NEXT: lui s0, 64 +; RV32IM-NEXT: lui s2, 128 ; RV32IM-NEXT: lui s4, 256 -; RV32IM-NEXT: lui s8, 512 -; RV32IM-NEXT: lui a7, 1024 -; RV32IM-NEXT: lui s9, 2048 -; RV32IM-NEXT: lui s10, 4096 -; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui s5, 512 +; RV32IM-NEXT: lui s6, 1024 +; RV32IM-NEXT: lui s7, 2048 +; RV32IM-NEXT: lui s8, 4096 +; RV32IM-NEXT: lui s9, 8192 ; RV32IM-NEXT: lui ra, 16384 -; RV32IM-NEXT: addi s5, a3, -256 -; RV32IM-NEXT: and t0, t0, s5 +; RV32IM-NEXT: addi a3, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, a3 ; RV32IM-NEXT: or t1, t0, t1 -; RV32IM-NEXT: lui a6, 32768 -; RV32IM-NEXT: and t4, t4, s5 -; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: lui a7, 32768 +; RV32IM-NEXT: and t4, t4, a3 +; RV32IM-NEXT: or t6, t4, t6 ; RV32IM-NEXT: lui t0, 65536 -; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: and a0, a0, a3 +; RV32IM-NEXT: mv t4, a3 +; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill ; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: lui a2, 131072 -; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: or a2, a2, a0 +; RV32IM-NEXT: lui a3, 131072 +; RV32IM-NEXT: and a1, a1, t4 ; RV32IM-NEXT: slli a1, a1, 8 -; RV32IM-NEXT: or t4, a5, a1 +; RV32IM-NEXT: or a0, a4, a1 ; RV32IM-NEXT: lui a1, 262144 -; RV32IM-NEXT: or a0, a0, t1 -; RV32IM-NEXT: lui a5, 524288 -; RV32IM-NEXT: addi t3, t3, -241 -; RV32IM-NEXT: addi t5, t5, 819 -; RV32IM-NEXT: addi t6, t6, 1365 -; RV32IM-NEXT: slli s7, s7, 11 -; RV32IM-NEXT: or a4, t4, a4 -; RV32IM-NEXT: srli t4, a0, 4 -; RV32IM-NEXT: and a0, a0, t3 -; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: addi s1, s1, -241 +; RV32IM-NEXT: addi s3, s3, 819 +; RV32IM-NEXT: or a2, a2, t1 +; RV32IM-NEXT: addi a4, a6, 1365 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a0, a0, t6 +; RV32IM-NEXT: srli a6, a2, 4 +; RV32IM-NEXT: and a2, a2, s1 +; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: slli a2, a2, 4 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 4 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a6, a6, s1 ; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, t4, a0 -; RV32IM-NEXT: srli t4, a4, 4 -; RV32IM-NEXT: and a4, a4, t3 -; RV32IM-NEXT: and t4, t4, t3 -; RV32IM-NEXT: slli a4, a4, 4 -; RV32IM-NEXT: or a4, t4, a4 -; RV32IM-NEXT: srli t4, a0, 2 -; RV32IM-NEXT: and a0, a0, t5 -; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 2 +; RV32IM-NEXT: and a2, a2, s3 +; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: slli a2, a2, 2 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 2 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a6, a6, s3 ; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, t4, a0 -; RV32IM-NEXT: srli t4, a4, 2 -; RV32IM-NEXT: and a4, a4, t5 -; RV32IM-NEXT: and t4, t4, t5 -; RV32IM-NEXT: slli a4, a4, 2 -; RV32IM-NEXT: or t4, t4, a4 -; RV32IM-NEXT: srli a4, a0, 1 -; RV32IM-NEXT: and a0, a0, t6 -; RV32IM-NEXT: and a4, a4, t6 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 1 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: and a6, a6, a4 +; RV32IM-NEXT: slli a2, a2, 1 +; RV32IM-NEXT: or a6, a6, a2 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 ; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a4, a4, a0 -; RV32IM-NEXT: srli a0, t4, 1 -; RV32IM-NEXT: and t4, t4, t6 -; RV32IM-NEXT: and a0, a0, t6 -; RV32IM-NEXT: slli t4, t4, 1 -; RV32IM-NEXT: or a0, a0, t4 -; RV32IM-NEXT: andi t4, a0, 2 -; RV32IM-NEXT: and s6, a0, s7 -; RV32IM-NEXT: lui t1, 1 -; RV32IM-NEXT: and t1, a0, t1 -; RV32IM-NEXT: sw t1, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui t1, 2 -; RV32IM-NEXT: and t1, a0, t1 -; RV32IM-NEXT: sw t1, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t1, a0, t2 -; RV32IM-NEXT: sw t1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: slli t3, t3, 11 +; RV32IM-NEXT: and t3, a0, t3 +; RV32IM-NEXT: lui a4, 1 +; RV32IM-NEXT: and t4, a0, a4 +; RV32IM-NEXT: and s11, a0, s11 +; RV32IM-NEXT: and a4, a0, t2 +; RV32IM-NEXT: sw a4, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s10 +; RV32IM-NEXT: sw a4, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t5 +; RV32IM-NEXT: sw a4, 64(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and a4, a0, s2 +; RV32IM-NEXT: sw a4, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s4, a0, s4 +; RV32IM-NEXT: and a4, a0, s5 +; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s6 +; RV32IM-NEXT: sw a4, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s7 +; RV32IM-NEXT: sw a4, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s8 +; RV32IM-NEXT: sw a4, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s9 +; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, ra +; RV32IM-NEXT: sw a4, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a7 +; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t0 +; RV32IM-NEXT: sw a4, 28(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a3, a0, a3 -; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s1, a0, s1 -; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, s2 -; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s3, a0, s3 -; RV32IM-NEXT: and a3, a0, s4 -; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, s8 -; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, a7 -; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s9, a0, s9 -; RV32IM-NEXT: and a3, a0, s10 -; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, s11 -; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, ra -; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, a6 -; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, t0 -; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, a2 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a1, a0, a1 -; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a5, a0, a5 -; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi ra, a0, 2 ; RV32IM-NEXT: andi a1, a0, 1 ; RV32IM-NEXT: andi a2, a0, 4 ; RV32IM-NEXT: andi a3, a0, 8 -; RV32IM-NEXT: andi a5, a0, 16 -; RV32IM-NEXT: andi a6, a0, 32 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 ; RV32IM-NEXT: andi a7, a0, 64 ; RV32IM-NEXT: andi t0, a0, 128 ; RV32IM-NEXT: andi t1, a0, 256 ; RV32IM-NEXT: andi t2, a0, 512 ; RV32IM-NEXT: andi a0, a0, 1024 -; RV32IM-NEXT: mul t4, a4, t4 -; RV32IM-NEXT: sw t4, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul ra, a4, a1 -; RV32IM-NEXT: mul s11, a4, a2 -; RV32IM-NEXT: mul s8, a4, a3 -; RV32IM-NEXT: mul s7, a4, a5 -; RV32IM-NEXT: mul s4, a4, a6 -; RV32IM-NEXT: mul a1, a4, a7 -; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a4, t0 -; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s2, a4, t1 -; RV32IM-NEXT: mul t2, a4, t2 -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a4, s6 -; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, a6, ra +; RV32IM-NEXT: mul s10, a6, a1 +; RV32IM-NEXT: mul s9, a6, a2 +; RV32IM-NEXT: mul s5, a6, a3 +; RV32IM-NEXT: mul s6, a6, a4 +; RV32IM-NEXT: mul s2, a6, a5 +; RV32IM-NEXT: mul a1, a6, a7 +; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a6, t0 +; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t6, a6, t1 +; RV32IM-NEXT: mul t2, a6, t2 +; RV32IM-NEXT: mul s7, a6, a0 +; RV32IM-NEXT: mul a0, a6, t3 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, t4 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t1, a6, s11 ; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t1, a4, a0 -; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a7, a4, a0 -; RV32IM-NEXT: mul s1, a4, s0 +; RV32IM-NEXT: mul a7, a6, a0 ; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t5, a6, a0 ; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s8, a6, a0 ; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a3, a4, s3 +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, s0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: mul a3, a6, a0 +; RV32IM-NEXT: mul a2, a6, s4 ; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a6, a4, a0 +; RV32IM-NEXT: mul a5, a6, a0 ; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t4, a4, a0 -; RV32IM-NEXT: mul s6, a4, s9 +; RV32IM-NEXT: mul t3, a6, a0 ; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: mul s4, a6, a0 ; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a4, a5 -; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a4, t0 -; RV32IM-NEXT: lw s0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a4, s0 -; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s3, a4, s3 -; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s9, a4, s9 -; RV32IM-NEXT: lw s10, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a4, s10 -; RV32IM-NEXT: lw s10, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor ra, ra, s10 -; RV32IM-NEXT: xor s8, s11, s8 -; RV32IM-NEXT: xor s4, s7, s4 -; RV32IM-NEXT: xor t2, s2, t2 +; RV32IM-NEXT: mul a1, a6, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: lw a4, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a6, a4 +; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a6, t0 +; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a6, t4 +; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a6, s0 +; RV32IM-NEXT: lw s11, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, a6, s11 +; RV32IM-NEXT: sw s11, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a6, a6, s11 +; RV32IM-NEXT: xor s10, s10, ra +; RV32IM-NEXT: xor s5, s9, s5 +; RV32IM-NEXT: xor s2, s6, s2 +; RV32IM-NEXT: xor t2, t6, t2 ; RV32IM-NEXT: xor a7, t1, a7 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a1, a0 -; RV32IM-NEXT: xor a1, ra, s8 -; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, s4, a3 -; RV32IM-NEXT: lw t1, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t1, t2, t1 -; RV32IM-NEXT: xor a7, a7, s1 -; RV32IM-NEXT: xor a2, a2, a6 -; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, s10, s5 +; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s2, a3 +; RV32IM-NEXT: xor t1, t2, s7 +; RV32IM-NEXT: xor a7, a7, t5 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: xor a0, a0, a4 ; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, t1, a3 -; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a7, a5 -; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a4, a7, s8 +; RV32IM-NEXT: xor a2, a2, t3 ; RV32IM-NEXT: xor a0, a0, t0 -; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a1, a6 -; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a3, a6 -; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a2, a2, s6 +; RV32IM-NEXT: lw a5, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: lw a5, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: lw a5, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a2, a2, s4 +; RV32IM-NEXT: xor a0, a0, t4 +; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 ; RV32IM-NEXT: xor a0, a0, s0 -; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a0, a0, s3 +; RV32IM-NEXT: lui a5, 349525 +; RV32IM-NEXT: addi a5, a5, 1364 ; RV32IM-NEXT: xor a3, a1, a3 ; RV32IM-NEXT: slli a1, a1, 24 -; RV32IM-NEXT: xor a3, a3, a5 -; RV32IM-NEXT: xor a0, a0, s9 -; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a3, a3, a4 +; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a0, a0, a4 -; RV32IM-NEXT: and a3, a2, s5 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a6 ; RV32IM-NEXT: srli a4, a2, 8 ; RV32IM-NEXT: xor a0, a2, a0 ; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, s5 +; RV32IM-NEXT: and a2, a4, a6 ; RV32IM-NEXT: srli a0, a0, 24 ; RV32IM-NEXT: or a1, a1, a3 ; RV32IM-NEXT: or a0, a2, a0 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: and a0, a0, t3 -; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a1, a1, s1 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: and a0, a0, t5 -; RV32IM-NEXT: and a1, a1, t5 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a1, a1, s3 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: and a0, a0, t6 -; RV32IM-NEXT: and a1, a1, t6 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a5 ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -4354,80 +4362,81 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd s9, 408(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s10, 400(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 392(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a5, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a5, a0, 8 ; RV64IM-NEXT: li s4, 255 -; RV64IM-NEXT: srli a4, a0, 40 -; RV64IM-NEXT: lui s10, 16 -; RV64IM-NEXT: srli a7, a0, 56 +; RV64IM-NEXT: srli ra, a0, 40 +; RV64IM-NEXT: lui s11, 16 +; RV64IM-NEXT: srli t0, a0, 56 ; RV64IM-NEXT: srliw t2, a0, 24 -; RV64IM-NEXT: slli t3, a0, 56 -; RV64IM-NEXT: lui t4, 61681 -; RV64IM-NEXT: lui s6, 209715 -; RV64IM-NEXT: lui s5, 349525 +; RV64IM-NEXT: slli a6, a0, 56 +; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: lui t4, 209715 +; RV64IM-NEXT: lui s8, 349525 ; RV64IM-NEXT: srli s3, a1, 24 ; RV64IM-NEXT: srli t6, a1, 8 -; RV64IM-NEXT: srli ra, a1, 40 -; RV64IM-NEXT: srli t0, a1, 56 +; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srli t5, a1, 56 ; RV64IM-NEXT: srliw s7, a1, 24 -; RV64IM-NEXT: slli a3, a1, 56 +; RV64IM-NEXT: slli a4, a1, 56 ; RV64IM-NEXT: li t1, 1 ; RV64IM-NEXT: lui s1, 256 ; RV64IM-NEXT: lui s2, 4096 ; RV64IM-NEXT: lui s0, 8192 ; RV64IM-NEXT: lui s9, 4080 -; RV64IM-NEXT: and a2, a5, s9 -; RV64IM-NEXT: slli t5, s4, 24 -; RV64IM-NEXT: addi s11, s10, -256 -; RV64IM-NEXT: and a5, a6, t5 -; RV64IM-NEXT: sd t5, 384(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a5, a2 -; RV64IM-NEXT: and a5, a0, s9 +; RV64IM-NEXT: and a2, a3, s9 +; RV64IM-NEXT: slli s5, s4, 24 +; RV64IM-NEXT: addi s10, s11, -256 +; RV64IM-NEXT: and a3, a5, s5 +; RV64IM-NEXT: sd s5, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s9 ; RV64IM-NEXT: slli t2, t2, 32 -; RV64IM-NEXT: addi s4, t4, -241 -; RV64IM-NEXT: addi s6, s6, 819 -; RV64IM-NEXT: addi s8, s5, 1365 -; RV64IM-NEXT: and a6, s3, s9 -; RV64IM-NEXT: and a4, a4, s11 -; RV64IM-NEXT: or a4, a4, a7 -; RV64IM-NEXT: and a7, a1, s9 +; RV64IM-NEXT: addi s4, t3, -241 +; RV64IM-NEXT: addi s6, t4, 819 +; RV64IM-NEXT: addi s8, s8, 1365 +; RV64IM-NEXT: and a5, s3, s9 +; RV64IM-NEXT: and t3, ra, s10 +; RV64IM-NEXT: or t0, t3, t0 +; RV64IM-NEXT: and t3, a1, s9 ; RV64IM-NEXT: slli t4, s7, 32 -; RV64IM-NEXT: slli a5, a5, 24 -; RV64IM-NEXT: or s5, a5, t2 -; RV64IM-NEXT: slli a5, s4, 32 -; RV64IM-NEXT: add s4, s4, a5 -; RV64IM-NEXT: slli a5, s6, 32 -; RV64IM-NEXT: add s6, s6, a5 -; RV64IM-NEXT: slli a5, s8, 32 -; RV64IM-NEXT: add s8, s8, a5 -; RV64IM-NEXT: slli s3, t1, 11 -; RV64IM-NEXT: and a5, t6, t5 -; RV64IM-NEXT: or a5, a5, a6 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s3, a3, t2 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli a3, s8, 32 +; RV64IM-NEXT: add s8, s8, a3 +; RV64IM-NEXT: slli s7, t1, 11 +; RV64IM-NEXT: and a3, t6, s5 +; RV64IM-NEXT: or a3, a3, a5 ; RV64IM-NEXT: slli t2, t1, 32 -; RV64IM-NEXT: and a6, ra, s11 -; RV64IM-NEXT: or a6, a6, t0 +; RV64IM-NEXT: and a5, a7, s10 +; RV64IM-NEXT: or a5, a5, t5 ; RV64IM-NEXT: slli ra, t1, 33 -; RV64IM-NEXT: slli a7, a7, 24 -; RV64IM-NEXT: or a7, a7, t4 -; RV64IM-NEXT: slli s7, t1, 34 -; RV64IM-NEXT: or a2, a2, a4 -; RV64IM-NEXT: slli a4, t1, 35 -; RV64IM-NEXT: sd a4, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s11 -; RV64IM-NEXT: sd s11, 352(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, t3, a0 -; RV64IM-NEXT: slli a4, t1, 36 -; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a4, a5, a6 -; RV64IM-NEXT: slli a6, t1, 37 -; RV64IM-NEXT: and a1, a1, s11 +; RV64IM-NEXT: slli t3, t3, 24 +; RV64IM-NEXT: or a7, t3, t4 +; RV64IM-NEXT: slli t3, t1, 34 +; RV64IM-NEXT: sd t3, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a2, t0 +; RV64IM-NEXT: slli t0, t1, 35 +; RV64IM-NEXT: sd t0, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: slli a0, a0, 40 +; RV64IM-NEXT: or a0, a6, a0 +; RV64IM-NEXT: slli a6, t1, 36 +; RV64IM-NEXT: or a3, a3, a5 +; RV64IM-NEXT: slli a5, t1, 37 +; RV64IM-NEXT: sd a5, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, a1, s10 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a3, a1 -; RV64IM-NEXT: or a0, a0, s5 +; RV64IM-NEXT: or a1, a4, a1 +; RV64IM-NEXT: or a0, a0, s3 ; RV64IM-NEXT: or a1, a1, a7 ; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a1, a1, a3 ; RV64IM-NEXT: srli a2, a0, 4 ; RV64IM-NEXT: sd s4, 376(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a0, a0, s4 @@ -4485,7 +4494,7 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: xor a1, a2, a1 ; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli s4, t1, 38 +; RV64IM-NEXT: slli s3, t1, 38 ; RV64IM-NEXT: lui a1, 2 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: lui a2, 4 @@ -4493,7 +4502,7 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 40 ; RV64IM-NEXT: lui a2, 128 ; RV64IM-NEXT: and a2, s5, a2 @@ -4501,42 +4510,42 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 41 ; RV64IM-NEXT: and a3, s5, s2 ; RV64IM-NEXT: and a4, s5, s0 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t1, 48 ; RV64IM-NEXT: and a4, s5, t2 ; RV64IM-NEXT: and a5, s5, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t1, 49 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 56 ; RV64IM-NEXT: and a2, s5, a3 ; RV64IM-NEXT: and a3, s5, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 57 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 39 ; RV64IM-NEXT: slli ra, t1, 42 ; RV64IM-NEXT: slli a4, t1, 43 @@ -4544,9 +4553,8 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli s0, t1, 45 ; RV64IM-NEXT: slli s1, t1, 46 ; RV64IM-NEXT: slli s2, t1, 47 -; RV64IM-NEXT: slli s6, t1, 50 -; RV64IM-NEXT: slli a1, t1, 51 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s4, t1, 50 +; RV64IM-NEXT: slli s6, t1, 51 ; RV64IM-NEXT: slli a1, t1, 52 ; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 53 @@ -4565,14 +4573,14 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli t1, t1, 62 ; RV64IM-NEXT: sd t1, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and t1, s5, s3 +; RV64IM-NEXT: and t1, s5, s7 ; RV64IM-NEXT: lui a3, 1 ; RV64IM-NEXT: and a1, s5, a3 ; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s5, a3 ; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s10 +; RV64IM-NEXT: and a1, s5, s11 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s8, 32 ; RV64IM-NEXT: and a1, s5, s8 @@ -4601,34 +4609,34 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: and a7, s5, t5 ; RV64IM-NEXT: lui t6, 262144 ; RV64IM-NEXT: and t6, s5, t6 -; RV64IM-NEXT: and s11, s5, s7 ; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, s5, a1 +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a6 ; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, a6 +; RV64IM-NEXT: and a1, s5, s3 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s4 -; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s5, a2 -; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and ra, s5, ra ; RV64IM-NEXT: and a1, s5, a4 -; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s5, a5 -; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s5, s0 -; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s5, s1 -; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s4 ; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s6 -; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s0, s5, a1 +; RV64IM-NEXT: and s0, s5, s6 ; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s1, s5, a1 ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload @@ -4654,13 +4662,13 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: srli s5, s5, 63 ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul t2, a0, a3 ; RV64IM-NEXT: mul a1, a0, t1 ; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t0, a0, a1 ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload @@ -4668,17 +4676,17 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a6, a0, a1 ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t5, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a5, a0, a1 ; RV64IM-NEXT: mul t3, a0, t3 @@ -4686,45 +4694,44 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a7 -; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, t6 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a2, a0, s11 -; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a7, a0, a1 -; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul s11, a0, a1 -; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul ra, a0, ra -; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a3, a0, a1 -; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t1, a0, a1 -; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t6, a0, a1 -; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul s0, a0, s0 ; RV64IM-NEXT: mul s1, a0, s1 ; RV64IM-NEXT: mul s2, a0, s2 ; RV64IM-NEXT: mul s3, a0, s3 ; RV64IM-NEXT: mul s4, a0, s4 -; RV64IM-NEXT: sd s4, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s6, a0, s6 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -4733,105 +4740,108 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli s5, s5, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, s5 +; RV64IM-NEXT: sd a0, 312(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld s5, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, s5, s4 -; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, s4, t4 -; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, s4, t2 -; RV64IM-NEXT: ld s4, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, s4, t0 -; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, s4, a6 -; RV64IM-NEXT: ld s4, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, s4, a5 -; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, s4, a2 -; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor ra, s4, ra -; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, s4, a1 -; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, s4, s6 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s5, a0 +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, a0, t2 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, a0, t0 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a0, a5 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor ra, a0, ra +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, a0, s6 ; RV64IM-NEXT: xor t4, s5, t4 -; RV64IM-NEXT: ld s4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, t2, s4 -; RV64IM-NEXT: ld s4, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t0, s4 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, a0 +; RV64IM-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, a0 ; RV64IM-NEXT: xor a6, a6, t5 ; RV64IM-NEXT: xor a5, a5, t3 ; RV64IM-NEXT: xor a2, a2, a7 ; RV64IM-NEXT: xor a3, ra, a3 ; RV64IM-NEXT: xor a1, a1, s0 ; RV64IM-NEXT: xor a7, s6, s7 -; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t4, t3 -; RV64IM-NEXT: ld t4, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, t2, t4 -; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t0, t4 -; RV64IM-NEXT: ld t4, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, t4 -; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, t4 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, a0 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 ; RV64IM-NEXT: xor a2, a2, s11 ; RV64IM-NEXT: xor a3, a3, t1 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a7, a7, s8 -; RV64IM-NEXT: ld t1, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t0, t1 -; RV64IM-NEXT: ld t1, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, t1 -; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 ; RV64IM-NEXT: xor a3, a3, t6 ; RV64IM-NEXT: xor a1, a1, s2 ; RV64IM-NEXT: xor a7, a7, s9 ; RV64IM-NEXT: xor t1, t3, t2 ; RV64IM-NEXT: xor t0, t1, t0 -; RV64IM-NEXT: ld t1, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, t1 -; RV64IM-NEXT: ld t1, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t1 -; RV64IM-NEXT: ld t1, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t1 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 ; RV64IM-NEXT: xor a1, a1, s3 ; RV64IM-NEXT: xor a7, a7, s10 ; RV64IM-NEXT: xor a6, t0, a6 ; RV64IM-NEXT: xor a4, a5, a4 -; RV64IM-NEXT: ld a5, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a5 -; RV64IM-NEXT: ld a5, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a5 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a5, %hi(.LCPI8_0) +; RV64IM-NEXT: ld a5, %lo(.LCPI8_0)(a5) ; RV64IM-NEXT: slli t3, t3, 56 -; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, a5 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a0, a7, a0 -; RV64IM-NEXT: ld t0, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a5, a6, t0 +; RV64IM-NEXT: ld t1, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a6, t1 ; RV64IM-NEXT: xor a4, a6, a4 -; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: slli a7, a7, 40 ; RV64IM-NEXT: xor a2, a4, a2 -; RV64IM-NEXT: or a4, t3, a5 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a5, a2, a7 +; RV64IM-NEXT: or a4, t3, a7 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a6, a2, t0 ; RV64IM-NEXT: xor a3, a2, a3 ; RV64IM-NEXT: srli a2, a2, 8 -; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: slli a6, a6, 24 ; RV64IM-NEXT: xor a1, a3, a1 -; RV64IM-NEXT: ld a6, 384(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a2, a2, a6 +; RV64IM-NEXT: ld a7, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a2, a2, a7 ; RV64IM-NEXT: srli a3, a3, 24 -; RV64IM-NEXT: srliw a6, a1, 24 -; RV64IM-NEXT: and a3, a3, a7 -; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a3, a3, t0 +; RV64IM-NEXT: srli t0, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: slli a7, a7, 32 ; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: and a1, a7, t0 +; RV64IM-NEXT: and a1, t0, t1 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a5, a6 +; RV64IM-NEXT: or a3, a6, a7 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -4851,9 +4861,10 @@ define i4 @clmulr_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: srli a1, a0, 1 ; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: and a1, a1, a5 ; RV64IM-NEXT: slli a0, a0, 1 ; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: srli a0, a0, 1 ; RV64IM-NEXT: ld ra, 488(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 480(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 472(sp) # 8-byte Folded Reload @@ -5136,8 +5147,9 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV32IM-NEXT: andi a1, a0, 85 ; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: slli a1, a1, 1 -; RV32IM-NEXT: andi a0, a0, 85 +; RV32IM-NEXT: andi a0, a0, 340 ; RV32IM-NEXT: or a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -5170,83 +5182,82 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a4, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: srli a3, a0, 24 +; RV64IM-NEXT: srli a7, a0, 8 ; RV64IM-NEXT: li s4, 255 -; RV64IM-NEXT: srli a5, a0, 40 -; RV64IM-NEXT: lui s8, 16 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s10, 16 ; RV64IM-NEXT: srli t1, a0, 56 -; RV64IM-NEXT: srliw t3, a0, 24 -; RV64IM-NEXT: slli t4, a0, 56 +; RV64IM-NEXT: srliw t4, a0, 24 +; RV64IM-NEXT: slli a5, a0, 56 ; RV64IM-NEXT: lui s3, 61681 ; RV64IM-NEXT: lui t5, 209715 ; RV64IM-NEXT: lui s6, 349525 ; RV64IM-NEXT: srli s9, a1, 24 ; RV64IM-NEXT: srli s0, a1, 8 -; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srli ra, a1, 40 ; RV64IM-NEXT: srli t2, a1, 56 ; RV64IM-NEXT: srliw s11, a1, 24 -; RV64IM-NEXT: slli a3, a1, 56 +; RV64IM-NEXT: slli a6, a1, 56 ; RV64IM-NEXT: li t0, 1 ; RV64IM-NEXT: lui s1, 128 ; RV64IM-NEXT: lui s2, 256 ; RV64IM-NEXT: lui t6, 4096 ; RV64IM-NEXT: lui s5, 8192 ; RV64IM-NEXT: lui s7, 4080 -; RV64IM-NEXT: and a2, a4, s7 -; RV64IM-NEXT: slli ra, s4, 24 -; RV64IM-NEXT: addi s10, s8, -256 -; RV64IM-NEXT: and a4, a6, ra -; RV64IM-NEXT: sd ra, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a4, a2 -; RV64IM-NEXT: and a4, a0, s7 -; RV64IM-NEXT: slli t3, t3, 32 +; RV64IM-NEXT: and a2, a3, s7 +; RV64IM-NEXT: slli t3, s4, 24 +; RV64IM-NEXT: addi s8, s10, -256 +; RV64IM-NEXT: and a3, a7, t3 +; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a3, a2 +; RV64IM-NEXT: and a3, a0, s7 +; RV64IM-NEXT: slli t4, t4, 32 ; RV64IM-NEXT: addi s3, s3, -241 ; RV64IM-NEXT: addi s4, t5, 819 ; RV64IM-NEXT: addi s6, s6, 1365 -; RV64IM-NEXT: and a6, s9, s7 -; RV64IM-NEXT: and a5, a5, s10 -; RV64IM-NEXT: or a5, a5, t1 +; RV64IM-NEXT: and a7, s9, s7 +; RV64IM-NEXT: and a4, a4, s8 +; RV64IM-NEXT: or a4, a4, t1 ; RV64IM-NEXT: and t1, a1, s7 ; RV64IM-NEXT: slli t5, s11, 32 -; RV64IM-NEXT: slli a4, a4, 24 -; RV64IM-NEXT: or s9, a4, t3 -; RV64IM-NEXT: slli a4, s3, 32 -; RV64IM-NEXT: add s3, s3, a4 -; RV64IM-NEXT: slli a4, s4, 32 -; RV64IM-NEXT: add s4, s4, a4 -; RV64IM-NEXT: slli a4, s6, 32 -; RV64IM-NEXT: add s6, s6, a4 -; RV64IM-NEXT: slli t3, t0, 11 -; RV64IM-NEXT: and a4, s0, ra -; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli a3, a3, 24 +; RV64IM-NEXT: or s9, a3, t4 +; RV64IM-NEXT: slli a3, s3, 32 +; RV64IM-NEXT: add s3, s3, a3 +; RV64IM-NEXT: slli a3, s4, 32 +; RV64IM-NEXT: add s4, s4, a3 +; RV64IM-NEXT: slli a3, s6, 32 +; RV64IM-NEXT: add s6, s6, a3 +; RV64IM-NEXT: slli t4, t0, 11 +; RV64IM-NEXT: and a3, s0, t3 +; RV64IM-NEXT: or a3, a3, a7 ; RV64IM-NEXT: slli s11, t0, 32 -; RV64IM-NEXT: and a6, a7, s10 -; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: and a7, ra, s8 +; RV64IM-NEXT: or a7, a7, t2 ; RV64IM-NEXT: slli ra, t0, 33 ; RV64IM-NEXT: slli t1, t1, 24 -; RV64IM-NEXT: or a7, t1, t5 +; RV64IM-NEXT: or t1, t1, t5 ; RV64IM-NEXT: slli s0, t0, 34 -; RV64IM-NEXT: or a2, a2, a5 -; RV64IM-NEXT: slli a5, t0, 35 -; RV64IM-NEXT: sd a5, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 344(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t0, 35 +; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s8 +; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, t4, a0 -; RV64IM-NEXT: slli a5, t0, 36 -; RV64IM-NEXT: sd a5, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a4, a4, a6 -; RV64IM-NEXT: slli a6, t0, 37 -; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: or a0, a5, a0 +; RV64IM-NEXT: slli a4, t0, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a7, t0, 37 +; RV64IM-NEXT: and a1, a1, s8 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a3, a1 -; RV64IM-NEXT: slli a3, t0, 38 -; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: slli a6, t0, 38 ; RV64IM-NEXT: or a0, a0, s9 -; RV64IM-NEXT: or a1, a1, a7 +; RV64IM-NEXT: or a1, a1, t1 ; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a4 +; RV64IM-NEXT: or a1, a1, a3 ; RV64IM-NEXT: srli a2, a0, 4 ; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a0, a0, s3 @@ -5303,7 +5314,7 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: xor a1, a2, a1 ; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli a7, t0, 39 +; RV64IM-NEXT: slli s3, t0, 39 ; RV64IM-NEXT: lui a1, 2 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: lui a2, 4 @@ -5318,51 +5329,52 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 41 ; RV64IM-NEXT: and a3, s6, t6 ; RV64IM-NEXT: and a4, s6, s5 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t0, 48 ; RV64IM-NEXT: and a4, s6, s11 ; RV64IM-NEXT: and a5, s6, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t0, 49 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 56 ; RV64IM-NEXT: and a2, s6, a3 ; RV64IM-NEXT: and a3, s6, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 57 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 42 ; RV64IM-NEXT: slli ra, t0, 43 ; RV64IM-NEXT: slli a4, t0, 44 ; RV64IM-NEXT: slli t6, t0, 45 ; RV64IM-NEXT: slli s1, t0, 46 ; RV64IM-NEXT: slli s2, t0, 47 -; RV64IM-NEXT: slli s3, t0, 50 -; RV64IM-NEXT: slli s4, t0, 51 -; RV64IM-NEXT: slli s5, t0, 52 +; RV64IM-NEXT: slli s4, t0, 50 +; RV64IM-NEXT: slli s5, t0, 51 +; RV64IM-NEXT: slli a1, t0, 52 +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 53 ; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 54 @@ -5379,7 +5391,7 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli t0, t0, 62 ; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, t3 +; RV64IM-NEXT: and a1, s6, t4 ; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui a3, 1 ; RV64IM-NEXT: and a1, s6, a3 @@ -5387,7 +5399,7 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s6, a3 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s8 +; RV64IM-NEXT: and a1, s6, s10 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s9, 32 ; RV64IM-NEXT: and a1, s6, s9 @@ -5419,16 +5431,15 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: and s11, s6, s0 ; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a6 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: and a1, s6, s3 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a2 ; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill @@ -5441,10 +5452,11 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, s2 ; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: and a1, s6, s4 ; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and s0, s6, s4 -; RV64IM-NEXT: and s1, s6, s5 +; RV64IM-NEXT: and s0, s6, s5 +; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s1, s6, a1 ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s2, s6, a1 ; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload @@ -5468,14 +5480,14 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: srli s6, s6, 63 ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul t3, a0, a3 ; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t1, a0, a1 ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload @@ -5483,17 +5495,17 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t6, a0, a1 ; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a6, a0, a1 ; RV64IM-NEXT: mul t5, a0, t2 @@ -5501,25 +5513,25 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a5 -; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, t0 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a3, a0, s11 -; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t0, a0, a1 -; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a2, a0, a1 ; RV64IM-NEXT: mul a5, a0, ra @@ -5531,7 +5543,7 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul ra, a0, a1 ; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul s0, a0, s0 @@ -5539,7 +5551,6 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul s2, a0, s2 ; RV64IM-NEXT: mul s3, a0, s3 ; RV64IM-NEXT: mul s4, a0, s4 -; RV64IM-NEXT: sd s4, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s5, a0, s5 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -5548,80 +5559,83 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: slli s6, s6, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, s6 +; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, s6, s4 -; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, s4, t4 -; RV64IM-NEXT: ld s4, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, s4, t3 -; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s4, t1 -; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, s4, a7 -; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, s4, a6 -; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, s4, a3 -; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, s4, a2 -; RV64IM-NEXT: ld s4, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, s4, a1 -; RV64IM-NEXT: ld s4, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, s4, s5 +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, a0 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, a0, t3 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a0, a6 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a0, a3 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, a0, s5 ; RV64IM-NEXT: xor t4, s6, t4 -; RV64IM-NEXT: ld s4, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, s4 -; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, s4 +; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 ; RV64IM-NEXT: xor a7, a7, t6 ; RV64IM-NEXT: xor a6, a6, t5 ; RV64IM-NEXT: xor a3, a3, t0 ; RV64IM-NEXT: xor a2, a2, a5 ; RV64IM-NEXT: xor a1, a1, s0 ; RV64IM-NEXT: xor a5, s5, s7 -; RV64IM-NEXT: ld t0, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t4, t0 -; RV64IM-NEXT: ld t4, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, t4 -; RV64IM-NEXT: ld t4, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, t4 -; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, t4 -; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, t4 -; RV64IM-NEXT: ld t4, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t4 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 ; RV64IM-NEXT: xor a2, a2, t2 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a5, a5, s8 -; RV64IM-NEXT: ld t2, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, t2 -; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, t2 -; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t2 -; RV64IM-NEXT: xor a2, a2, s11 -; RV64IM-NEXT: xor a1, a1, s2 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: xor a2, a2, s11 +; RV64IM-NEXT: xor a1, a1, s2 ; RV64IM-NEXT: xor a5, a5, s9 ; RV64IM-NEXT: xor t2, t0, t3 ; RV64IM-NEXT: xor t1, t2, t1 -; RV64IM-NEXT: ld t2, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, t2 -; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, t2 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 ; RV64IM-NEXT: xor a2, a2, ra ; RV64IM-NEXT: xor a1, a1, s3 ; RV64IM-NEXT: xor a5, a5, s10 ; RV64IM-NEXT: xor a7, t1, a7 ; RV64IM-NEXT: xor a4, a6, a4 -; RV64IM-NEXT: ld a6, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a6 -; RV64IM-NEXT: ld a6, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a6 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a6, %hi(.LCPI9_0) +; RV64IM-NEXT: ld a6, %lo(.LCPI9_0)(a6) ; RV64IM-NEXT: slli t0, t0, 56 -; RV64IM-NEXT: ld a6, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, a6 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a0, a5, a0 ; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a5, a7, t1 @@ -5629,24 +5643,24 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: slli a5, a5, 40 ; RV64IM-NEXT: xor a3, a4, a3 ; RV64IM-NEXT: or a4, t0, a5 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a5, a3, a7 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a3, t0 ; RV64IM-NEXT: xor a2, a3, a2 ; RV64IM-NEXT: srli a3, a3, 8 ; RV64IM-NEXT: slli a5, a5, 24 ; RV64IM-NEXT: xor a1, a2, a1 -; RV64IM-NEXT: ld a6, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a3, a3, a6 +; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a7 ; RV64IM-NEXT: srli a2, a2, 24 -; RV64IM-NEXT: srliw a6, a1, 24 -; RV64IM-NEXT: and a2, a2, a7 -; RV64IM-NEXT: srli a7, a1, 40 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli a6, a6, 32 +; RV64IM-NEXT: slli a7, a7, 32 ; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a1, a7, t1 +; RV64IM-NEXT: and a1, t0, t1 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a5, a6 +; RV64IM-NEXT: or a3, a5, a7 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -5663,11 +5677,13 @@ define i8 @clmulr_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: slli a0, a0, 2 ; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: andi a1, a0, 85 -; RV64IM-NEXT: srli a0, a0, 1 -; RV64IM-NEXT: slli a1, a1, 1 +; RV64IM-NEXT: srli a1, a0, 1 ; RV64IM-NEXT: andi a0, a0, 85 -; RV64IM-NEXT: or a0, a0, a1 +; RV64IM-NEXT: and a1, a1, a6 +; RV64IM-NEXT: slli a0, a0, 1 +; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 55 +; RV64IM-NEXT: srli a0, a0, 56 ; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload @@ -5712,8 +5728,8 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: lui ra, 16 ; RV32IM-NEXT: srli t1, a0, 24 ; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui s6, 61681 -; RV32IM-NEXT: lui t3, 209715 +; RV32IM-NEXT: lui s10, 61681 +; RV32IM-NEXT: lui t2, 209715 ; RV32IM-NEXT: lui a4, 349525 ; RV32IM-NEXT: srli t4, a1, 8 ; RV32IM-NEXT: srli t5, a1, 24 @@ -5721,55 +5737,55 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: li t6, 1 ; RV32IM-NEXT: lui a7, 2 ; RV32IM-NEXT: lui a6, 4 -; RV32IM-NEXT: lui t2, 8 -; RV32IM-NEXT: lui s1, 32 -; RV32IM-NEXT: lui s0, 64 -; RV32IM-NEXT: lui s3, 128 -; RV32IM-NEXT: lui s4, 256 -; RV32IM-NEXT: lui s5, 512 -; RV32IM-NEXT: lui s8, 1024 +; RV32IM-NEXT: lui s2, 8 +; RV32IM-NEXT: lui s0, 32 +; RV32IM-NEXT: lui s1, 64 +; RV32IM-NEXT: lui t3, 128 +; RV32IM-NEXT: lui s3, 256 +; RV32IM-NEXT: lui s4, 512 +; RV32IM-NEXT: lui s6, 1024 ; RV32IM-NEXT: lui s7, 2048 -; RV32IM-NEXT: lui s9, 4096 -; RV32IM-NEXT: lui s10, 8192 +; RV32IM-NEXT: lui s8, 4096 +; RV32IM-NEXT: lui s9, 8192 ; RV32IM-NEXT: lui s11, 16384 -; RV32IM-NEXT: addi s2, ra, -256 -; RV32IM-NEXT: sw s2, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t0, t0, s2 +; RV32IM-NEXT: addi s5, ra, -256 +; RV32IM-NEXT: sw s5, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t0, t0, s5 ; RV32IM-NEXT: or t1, t0, t1 ; RV32IM-NEXT: lui a3, 32768 -; RV32IM-NEXT: and t4, t4, s2 +; RV32IM-NEXT: and t4, t4, s5 ; RV32IM-NEXT: or t5, t4, t5 ; RV32IM-NEXT: lui t0, 65536 -; RV32IM-NEXT: and a0, a0, s2 +; RV32IM-NEXT: and a0, a0, s5 ; RV32IM-NEXT: slli a0, a0, 8 ; RV32IM-NEXT: or a2, a2, a0 ; RV32IM-NEXT: lui t4, 131072 -; RV32IM-NEXT: and a1, a1, s2 +; RV32IM-NEXT: and a1, a1, s5 ; RV32IM-NEXT: slli a1, a1, 8 ; RV32IM-NEXT: or a0, a5, a1 ; RV32IM-NEXT: lui a5, 262144 -; RV32IM-NEXT: addi s2, s6, -241 -; RV32IM-NEXT: addi s6, t3, 819 +; RV32IM-NEXT: addi s5, s10, -241 +; RV32IM-NEXT: addi s10, t2, 819 ; RV32IM-NEXT: addi a4, a4, 1365 ; RV32IM-NEXT: or a2, a2, t1 ; RV32IM-NEXT: or a0, a0, t5 ; RV32IM-NEXT: srli t1, a2, 4 -; RV32IM-NEXT: and a2, a2, s2 +; RV32IM-NEXT: and a2, a2, s5 ; RV32IM-NEXT: srli t5, a0, 4 -; RV32IM-NEXT: and a0, a0, s2 -; RV32IM-NEXT: and t1, t1, s2 +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: and t1, t1, s5 ; RV32IM-NEXT: slli a2, a2, 4 -; RV32IM-NEXT: and t5, t5, s2 +; RV32IM-NEXT: and t5, t5, s5 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a2, t1, a2 ; RV32IM-NEXT: or a0, t5, a0 ; RV32IM-NEXT: srli t1, a2, 2 -; RV32IM-NEXT: and a2, a2, s6 +; RV32IM-NEXT: and a2, a2, s10 ; RV32IM-NEXT: srli t5, a0, 2 -; RV32IM-NEXT: and a0, a0, s6 -; RV32IM-NEXT: and t1, t1, s6 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: and t1, t1, s10 ; RV32IM-NEXT: slli a2, a2, 2 -; RV32IM-NEXT: and t5, t5, s6 +; RV32IM-NEXT: and t5, t5, s10 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a2, t1, a2 ; RV32IM-NEXT: or a0, t5, a0 @@ -5785,7 +5801,7 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a4, t1, a2 ; RV32IM-NEXT: or a0, t5, a0 -; RV32IM-NEXT: andi t3, a0, 2 +; RV32IM-NEXT: andi t2, a0, 2 ; RV32IM-NEXT: andi t5, a0, 1 ; RV32IM-NEXT: and t6, a0, t6 ; RV32IM-NEXT: lui a2, 1 @@ -5795,36 +5811,37 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: sw a2, 76(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, a6 ; RV32IM-NEXT: sw a2, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t2, a0, t2 +; RV32IM-NEXT: and a2, a0, s2 +; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and ra, a0, ra -; RV32IM-NEXT: and s1, a0, s1 -; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and s0, a0, s0 -; RV32IM-NEXT: and s3, a0, s3 -; RV32IM-NEXT: and a2, a0, s4 -; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s5 +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: sw s1, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, t3 ; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s8 +; RV32IM-NEXT: and a2, a0, s3 ; RV32IM-NEXT: sw a2, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s7 +; RV32IM-NEXT: and s4, a0, s4 +; RV32IM-NEXT: and a2, a0, s6 ; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s9 +; RV32IM-NEXT: and a2, a0, s7 ; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s10 +; RV32IM-NEXT: and a2, a0, s8 ; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s11 +; RV32IM-NEXT: and a2, a0, s9 ; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s11 +; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a3, a0, a3 -; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, t0 -; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, t4 ; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, t4 +; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a5, a0, a5 -; RV32IM-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a1, a0, a1 -; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: andi a1, a0, 4 ; RV32IM-NEXT: andi a2, a0, 8 ; RV32IM-NEXT: andi a3, a0, 16 @@ -5834,126 +5851,130 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: andi t0, a0, 256 ; RV32IM-NEXT: andi t1, a0, 512 ; RV32IM-NEXT: andi a0, a0, 1024 -; RV32IM-NEXT: mul s11, a4, t3 -; RV32IM-NEXT: mul s9, a4, t5 +; RV32IM-NEXT: mul s11, a4, t2 +; RV32IM-NEXT: mul s7, a4, t5 ; RV32IM-NEXT: mul s8, a4, a1 -; RV32IM-NEXT: mul s4, a4, a2 -; RV32IM-NEXT: mul s5, a4, a3 +; RV32IM-NEXT: mul s3, a4, a2 +; RV32IM-NEXT: mul s2, a4, a3 ; RV32IM-NEXT: mul s1, a4, a5 ; RV32IM-NEXT: mul a1, a4, a6 -; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill ; RV32IM-NEXT: mul a1, a4, a7 ; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill ; RV32IM-NEXT: mul t5, a4, t0 ; RV32IM-NEXT: mul t3, a4, t1 -; RV32IM-NEXT: mul s10, a4, a0 +; RV32IM-NEXT: mul s9, a4, a0 ; RV32IM-NEXT: mul a0, a4, t6 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul t1, a4, a0 ; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a7, a4, a0 -; RV32IM-NEXT: mul t6, a4, t2 -; RV32IM-NEXT: mul s7, a4, ra ; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t6, a4, a0 +; RV32IM-NEXT: mul s6, a4, ra ; RV32IM-NEXT: mul a0, a4, s0 -; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a3, a4, s3 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a6, a4, a0 +; RV32IM-NEXT: mul a3, a4, a0 ; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t2, a4, a0 +; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: mul a6, a4, s4 ; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s3, a4, a0 +; RV32IM-NEXT: mul t2, a4, a0 ; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: mul s4, a4, a0 ; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a5, 36(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a5, a4, a5 -; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul t0, a4, t0 -; RV32IM-NEXT: lw t4, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul t4, a4, t4 -; RV32IM-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul s0, a4, s0 -; RV32IM-NEXT: lw ra, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul ra, a4, ra ; RV32IM-NEXT: sw ra, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a4, a4, ra -; RV32IM-NEXT: xor s9, s9, s11 -; RV32IM-NEXT: xor s4, s8, s4 -; RV32IM-NEXT: xor s1, s5, s1 +; RV32IM-NEXT: xor s7, s7, s11 +; RV32IM-NEXT: xor s3, s8, s3 +; RV32IM-NEXT: xor s1, s2, s1 ; RV32IM-NEXT: xor t3, t5, t3 ; RV32IM-NEXT: xor a7, t1, a7 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a1, a0 -; RV32IM-NEXT: xor a1, s9, s4 -; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, s7, s3 +; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, s1, a3 -; RV32IM-NEXT: xor t1, t3, s10 +; RV32IM-NEXT: xor t1, t3, s9 ; RV32IM-NEXT: xor a7, a7, t6 ; RV32IM-NEXT: xor a2, a2, a6 ; RV32IM-NEXT: xor a0, a0, a5 ; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, t1, a3 -; RV32IM-NEXT: xor a5, a7, s7 +; RV32IM-NEXT: xor a5, a7, s6 ; RV32IM-NEXT: xor a2, a2, t2 ; RV32IM-NEXT: xor a0, a0, t0 ; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a1, a1, a6 -; RV32IM-NEXT: lw a6, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a6, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, a3, a6 ; RV32IM-NEXT: lw a6, 72(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a2, a2, s3 +; RV32IM-NEXT: xor a2, a2, s4 ; RV32IM-NEXT: xor a0, a0, t4 ; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a5, a5, a6 ; RV32IM-NEXT: xor a0, a0, s0 ; RV32IM-NEXT: xor a3, a1, a3 ; RV32IM-NEXT: xor a3, a3, a5 -; RV32IM-NEXT: lui a5, 5 -; RV32IM-NEXT: addi a5, a5, 1365 -; RV32IM-NEXT: slli a1, a1, 24 +; RV32IM-NEXT: lui a5, 21 ; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lui a6, 5 +; RV32IM-NEXT: addi a5, a5, 1364 +; RV32IM-NEXT: addi a6, a6, 1365 +; RV32IM-NEXT: slli a1, a1, 24 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a0, a4 -; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a3, a2, a6 +; RV32IM-NEXT: lw a7, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a7 ; RV32IM-NEXT: srli a4, a2, 8 ; RV32IM-NEXT: xor a0, a2, a0 ; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, a6 +; RV32IM-NEXT: and a2, a4, a7 ; RV32IM-NEXT: srli a0, a0, 24 ; RV32IM-NEXT: or a1, a1, a3 ; RV32IM-NEXT: or a0, a2, a0 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: and a0, a0, s2 -; RV32IM-NEXT: and a1, a1, s2 +; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: and a1, a1, s5 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: and a0, a0, s6 -; RV32IM-NEXT: and a1, a1, s6 +; RV32IM-NEXT: and a0, a0, s10 +; RV32IM-NEXT: and a1, a1, s10 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: and a0, a0, a5 +; RV32IM-NEXT: and a0, a0, a6 ; RV32IM-NEXT: and a1, a1, a5 ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -5987,14 +6008,14 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill ; RV64IM-NEXT: srli a3, a0, 24 -; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: srli a6, a0, 8 ; RV64IM-NEXT: li s4, 255 ; RV64IM-NEXT: srli a4, a0, 40 -; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: lui s3, 16 ; RV64IM-NEXT: srli t1, a0, 56 ; RV64IM-NEXT: srliw t4, a0, 24 -; RV64IM-NEXT: slli a5, a0, 56 -; RV64IM-NEXT: lui s3, 61681 +; RV64IM-NEXT: slli a7, a0, 56 +; RV64IM-NEXT: lui t3, 61681 ; RV64IM-NEXT: lui t5, 209715 ; RV64IM-NEXT: lui s6, 349525 ; RV64IM-NEXT: srli s9, a1, 24 @@ -6002,7 +6023,7 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: srli ra, a1, 40 ; RV64IM-NEXT: srli t2, a1, 56 ; RV64IM-NEXT: srliw s11, a1, 24 -; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: slli a5, a1, 56 ; RV64IM-NEXT: li t0, 1 ; RV64IM-NEXT: lui s1, 128 ; RV64IM-NEXT: lui s2, 256 @@ -6010,21 +6031,21 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: lui s5, 8192 ; RV64IM-NEXT: lui s7, 4080 ; RV64IM-NEXT: and a2, a3, s7 -; RV64IM-NEXT: slli t3, s4, 24 -; RV64IM-NEXT: addi s8, s10, -256 -; RV64IM-NEXT: and a3, a7, t3 -; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s10, s4, 24 +; RV64IM-NEXT: addi s8, s3, -256 +; RV64IM-NEXT: and a3, a6, s10 +; RV64IM-NEXT: sd s10, 368(sp) # 8-byte Folded Spill ; RV64IM-NEXT: or a2, a3, a2 ; RV64IM-NEXT: and a3, a0, s7 ; RV64IM-NEXT: slli t4, t4, 32 -; RV64IM-NEXT: addi s3, s3, -241 +; RV64IM-NEXT: addi s3, t3, -241 ; RV64IM-NEXT: addi s4, t5, 819 ; RV64IM-NEXT: addi s6, s6, 1365 -; RV64IM-NEXT: and a7, s9, s7 +; RV64IM-NEXT: and a6, s9, s7 ; RV64IM-NEXT: and a4, a4, s8 ; RV64IM-NEXT: or a4, a4, t1 ; RV64IM-NEXT: and t1, a1, s7 -; RV64IM-NEXT: slli t5, s11, 32 +; RV64IM-NEXT: slli t3, s11, 32 ; RV64IM-NEXT: slli a3, a3, 24 ; RV64IM-NEXT: or s9, a3, t4 ; RV64IM-NEXT: slli a3, s3, 32 @@ -6034,14 +6055,14 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: slli a3, s6, 32 ; RV64IM-NEXT: add s6, s6, a3 ; RV64IM-NEXT: slli t4, t0, 11 -; RV64IM-NEXT: and a3, s0, t3 -; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: and a3, s0, s10 +; RV64IM-NEXT: or a3, a3, a6 ; RV64IM-NEXT: slli s11, t0, 32 -; RV64IM-NEXT: and a7, ra, s8 -; RV64IM-NEXT: or a7, a7, t2 +; RV64IM-NEXT: and a6, ra, s8 +; RV64IM-NEXT: or a6, a6, t2 ; RV64IM-NEXT: slli ra, t0, 33 ; RV64IM-NEXT: slli t1, t1, 24 -; RV64IM-NEXT: or t1, t1, t5 +; RV64IM-NEXT: or t1, t1, t3 ; RV64IM-NEXT: slli s0, t0, 34 ; RV64IM-NEXT: or a2, a2, a4 ; RV64IM-NEXT: slli a4, t0, 35 @@ -6049,15 +6070,15 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: and a0, a0, s8 ; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, a5, a0 -; RV64IM-NEXT: slli a4, t0, 36 -; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a3, a3, a7 -; RV64IM-NEXT: slli a7, t0, 37 +; RV64IM-NEXT: or a0, a7, a0 +; RV64IM-NEXT: slli a7, t0, 36 +; RV64IM-NEXT: or a3, a3, a6 +; RV64IM-NEXT: slli a6, t0, 37 ; RV64IM-NEXT: and a1, a1, s8 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a6, a1 -; RV64IM-NEXT: slli a6, t0, 38 +; RV64IM-NEXT: or a1, a5, a1 +; RV64IM-NEXT: slli a4, t0, 38 +; RV64IM-NEXT: sd a4, 288(sp) # 8-byte Folded Spill ; RV64IM-NEXT: or a0, a0, s9 ; RV64IM-NEXT: or a1, a1, t1 ; RV64IM-NEXT: or a0, a0, a2 @@ -6126,7 +6147,7 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 40 ; RV64IM-NEXT: and a2, s6, s1 ; RV64IM-NEXT: and a3, s6, s2 @@ -6203,7 +6224,8 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s6, a3 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: lui a1, 16 +; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s9, 32 ; RV64IM-NEXT: and a1, s6, s9 @@ -6236,12 +6258,12 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a7 -; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, s3 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill @@ -6302,7 +6324,7 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload @@ -6371,7 +6393,7 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: xor t4, a0, t4 ; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor t3, a0, t3 -; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor t1, a0, t1 ; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a7, a0, a7 @@ -6411,7 +6433,7 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: xor a2, a2, t2 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a5, a5, s8 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor t1, t1, a0 ; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a6, a6, a0 @@ -6436,35 +6458,37 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a2, a2, a0 ; RV64IM-NEXT: xor a1, a1, s4 -; RV64IM-NEXT: lui a6, 5 -; RV64IM-NEXT: addi a6, a6, 1365 -; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: lui a6, %hi(.LCPI10_0) ; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a0, a5, a0 -; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a5, a7, t1 +; RV64IM-NEXT: lui a5, 5 +; RV64IM-NEXT: ld a6, %lo(.LCPI10_0)(a6) +; RV64IM-NEXT: addi a5, a5, 1365 +; RV64IM-NEXT: slli t0, t0, 56 ; RV64IM-NEXT: xor a4, a7, a4 -; RV64IM-NEXT: slli a5, a5, 40 +; RV64IM-NEXT: ld t2, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a7, a7, t2 +; RV64IM-NEXT: slli a7, a7, 40 ; RV64IM-NEXT: xor a3, a4, a3 -; RV64IM-NEXT: or a4, t0, a5 -; RV64IM-NEXT: lui t0, 4080 -; RV64IM-NEXT: and a5, a3, t0 +; RV64IM-NEXT: or a4, t0, a7 +; RV64IM-NEXT: lui t1, 4080 +; RV64IM-NEXT: and a7, a3, t1 ; RV64IM-NEXT: xor a2, a3, a2 ; RV64IM-NEXT: srli a3, a3, 8 -; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: slli a7, a7, 24 ; RV64IM-NEXT: xor a1, a2, a1 -; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: ld t0, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, t0 ; RV64IM-NEXT: srli a2, a2, 24 -; RV64IM-NEXT: srliw a7, a1, 24 -; RV64IM-NEXT: and a2, a2, t0 -; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: srliw t0, a1, 24 +; RV64IM-NEXT: and a2, a2, t1 +; RV64IM-NEXT: srli t1, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: slli t0, t0, 32 ; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: and a1, t1, t2 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a5, a7 +; RV64IM-NEXT: or a3, a7, t0 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -6482,10 +6506,12 @@ define i16 @clmulr_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: slli a0, a0, 2 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: and a0, a0, a6 +; RV64IM-NEXT: and a0, a0, a5 ; RV64IM-NEXT: and a1, a1, a6 ; RV64IM-NEXT: slli a0, a0, 1 ; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 47 +; RV64IM-NEXT: srli a0, a0, 48 ; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload @@ -6530,250 +6556,253 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV32IM-NEXT: lui a3, 16 ; RV32IM-NEXT: srli t1, a0, 24 ; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui t3, 61681 -; RV32IM-NEXT: lui t5, 209715 -; RV32IM-NEXT: lui t6, 349525 +; RV32IM-NEXT: lui s1, 61681 +; RV32IM-NEXT: lui s3, 209715 +; RV32IM-NEXT: lui a6, 349525 ; RV32IM-NEXT: srli t4, a1, 8 -; RV32IM-NEXT: srli a4, a1, 24 -; RV32IM-NEXT: slli a5, a1, 24 -; RV32IM-NEXT: li s7, 1 +; RV32IM-NEXT: srli t6, a1, 24 +; RV32IM-NEXT: slli a4, a1, 24 +; RV32IM-NEXT: li t3, 1 +; RV32IM-NEXT: lui s11, 2 ; RV32IM-NEXT: lui t2, 4 -; RV32IM-NEXT: lui s0, 8 -; RV32IM-NEXT: lui s1, 32 -; RV32IM-NEXT: lui s2, 64 -; RV32IM-NEXT: lui s3, 128 +; RV32IM-NEXT: lui s10, 8 +; RV32IM-NEXT: lui t5, 32 +; RV32IM-NEXT: lui s0, 64 +; RV32IM-NEXT: lui s2, 128 ; RV32IM-NEXT: lui s4, 256 -; RV32IM-NEXT: lui s8, 512 -; RV32IM-NEXT: lui a7, 1024 -; RV32IM-NEXT: lui s9, 2048 -; RV32IM-NEXT: lui s10, 4096 -; RV32IM-NEXT: lui s11, 8192 +; RV32IM-NEXT: lui s5, 512 +; RV32IM-NEXT: lui s6, 1024 +; RV32IM-NEXT: lui s7, 2048 +; RV32IM-NEXT: lui s8, 4096 +; RV32IM-NEXT: lui s9, 8192 ; RV32IM-NEXT: lui ra, 16384 -; RV32IM-NEXT: addi s5, a3, -256 -; RV32IM-NEXT: and t0, t0, s5 +; RV32IM-NEXT: addi a3, a3, -256 +; RV32IM-NEXT: lui a5, 16 +; RV32IM-NEXT: and t0, t0, a3 ; RV32IM-NEXT: or t1, t0, t1 -; RV32IM-NEXT: lui a6, 32768 -; RV32IM-NEXT: and t4, t4, s5 -; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: lui a7, 32768 +; RV32IM-NEXT: and t4, t4, a3 +; RV32IM-NEXT: or t6, t4, t6 ; RV32IM-NEXT: lui t0, 65536 -; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: and a0, a0, a3 +; RV32IM-NEXT: mv t4, a3 +; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill ; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: lui a2, 131072 -; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: or a2, a2, a0 +; RV32IM-NEXT: lui a3, 131072 +; RV32IM-NEXT: and a1, a1, t4 ; RV32IM-NEXT: slli a1, a1, 8 -; RV32IM-NEXT: or t4, a5, a1 +; RV32IM-NEXT: or a0, a4, a1 ; RV32IM-NEXT: lui a1, 262144 -; RV32IM-NEXT: or a0, a0, t1 -; RV32IM-NEXT: lui a5, 524288 -; RV32IM-NEXT: addi t3, t3, -241 -; RV32IM-NEXT: addi t5, t5, 819 -; RV32IM-NEXT: addi t6, t6, 1365 -; RV32IM-NEXT: slli s7, s7, 11 -; RV32IM-NEXT: or a4, t4, a4 -; RV32IM-NEXT: srli t4, a0, 4 -; RV32IM-NEXT: and a0, a0, t3 -; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: addi s1, s1, -241 +; RV32IM-NEXT: addi s3, s3, 819 +; RV32IM-NEXT: or a2, a2, t1 +; RV32IM-NEXT: addi a4, a6, 1365 +; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a0, a0, t6 +; RV32IM-NEXT: srli a6, a2, 4 +; RV32IM-NEXT: and a2, a2, s1 +; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: slli a2, a2, 4 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 4 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a6, a6, s1 ; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, t4, a0 -; RV32IM-NEXT: srli t4, a4, 4 -; RV32IM-NEXT: and a4, a4, t3 -; RV32IM-NEXT: and t4, t4, t3 -; RV32IM-NEXT: slli a4, a4, 4 -; RV32IM-NEXT: or a4, t4, a4 -; RV32IM-NEXT: srli t4, a0, 2 -; RV32IM-NEXT: and a0, a0, t5 -; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 2 +; RV32IM-NEXT: and a2, a2, s3 +; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: slli a2, a2, 2 +; RV32IM-NEXT: or a2, a6, a2 +; RV32IM-NEXT: srli a6, a0, 2 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a6, a6, s3 ; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, t4, a0 -; RV32IM-NEXT: srli t4, a4, 2 -; RV32IM-NEXT: and a4, a4, t5 -; RV32IM-NEXT: and t4, t4, t5 -; RV32IM-NEXT: slli a4, a4, 2 -; RV32IM-NEXT: or t4, t4, a4 -; RV32IM-NEXT: srli a4, a0, 1 -; RV32IM-NEXT: and a0, a0, t6 -; RV32IM-NEXT: and a4, a4, t6 +; RV32IM-NEXT: or a0, a6, a0 +; RV32IM-NEXT: srli a6, a2, 1 +; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: and a6, a6, a4 +; RV32IM-NEXT: slli a2, a2, 1 +; RV32IM-NEXT: or a6, a6, a2 +; RV32IM-NEXT: srli a2, a0, 1 +; RV32IM-NEXT: and a0, a0, a4 +; RV32IM-NEXT: and a2, a2, a4 ; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a4, a4, a0 -; RV32IM-NEXT: srli a0, t4, 1 -; RV32IM-NEXT: and t4, t4, t6 -; RV32IM-NEXT: and a0, a0, t6 -; RV32IM-NEXT: slli t4, t4, 1 -; RV32IM-NEXT: or a0, a0, t4 -; RV32IM-NEXT: andi t4, a0, 2 -; RV32IM-NEXT: and s6, a0, s7 -; RV32IM-NEXT: lui t1, 1 -; RV32IM-NEXT: and t1, a0, t1 -; RV32IM-NEXT: sw t1, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lui t1, 2 -; RV32IM-NEXT: and t1, a0, t1 -; RV32IM-NEXT: sw t1, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t1, a0, t2 -; RV32IM-NEXT: sw t1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 524288 +; RV32IM-NEXT: slli t3, t3, 11 +; RV32IM-NEXT: and t3, a0, t3 +; RV32IM-NEXT: lui a4, 1 +; RV32IM-NEXT: and t4, a0, a4 +; RV32IM-NEXT: and s11, a0, s11 +; RV32IM-NEXT: and a4, a0, t2 +; RV32IM-NEXT: sw a4, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s10 +; RV32IM-NEXT: sw a4, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t5 +; RV32IM-NEXT: sw a4, 64(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and s0, a0, s0 -; RV32IM-NEXT: and a3, a0, a3 -; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s1, a0, s1 -; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, s2 -; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s3, a0, s3 -; RV32IM-NEXT: and a3, a0, s4 -; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, s8 -; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, a7 -; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s9, a0, s9 -; RV32IM-NEXT: and a3, a0, s10 -; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, s11 -; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, ra -; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, a6 -; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a3, a0, t0 -; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, a2 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s2 +; RV32IM-NEXT: sw a4, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s4, a0, s4 +; RV32IM-NEXT: and a4, a0, s5 +; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s6 +; RV32IM-NEXT: sw a4, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s7 +; RV32IM-NEXT: sw a4, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s8 +; RV32IM-NEXT: sw a4, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, s9 +; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, ra +; RV32IM-NEXT: sw a4, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, a7 +; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a4, a0, t0 +; RV32IM-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a3 +; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a1, a0, a1 -; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a5, a0, a5 -; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, a2 +; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: andi ra, a0, 2 ; RV32IM-NEXT: andi a1, a0, 1 ; RV32IM-NEXT: andi a2, a0, 4 ; RV32IM-NEXT: andi a3, a0, 8 -; RV32IM-NEXT: andi a5, a0, 16 -; RV32IM-NEXT: andi a6, a0, 32 +; RV32IM-NEXT: andi a4, a0, 16 +; RV32IM-NEXT: andi a5, a0, 32 ; RV32IM-NEXT: andi a7, a0, 64 ; RV32IM-NEXT: andi t0, a0, 128 ; RV32IM-NEXT: andi t1, a0, 256 ; RV32IM-NEXT: andi t2, a0, 512 ; RV32IM-NEXT: andi a0, a0, 1024 -; RV32IM-NEXT: mul t4, a4, t4 -; RV32IM-NEXT: sw t4, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul ra, a4, a1 -; RV32IM-NEXT: mul s11, a4, a2 -; RV32IM-NEXT: mul s8, a4, a3 -; RV32IM-NEXT: mul s7, a4, a5 -; RV32IM-NEXT: mul s4, a4, a6 -; RV32IM-NEXT: mul a1, a4, a7 -; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a4, t0 -; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul s2, a4, t1 -; RV32IM-NEXT: mul t2, a4, t2 -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a4, s6 -; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, a6, ra +; RV32IM-NEXT: mul s10, a6, a1 +; RV32IM-NEXT: mul s9, a6, a2 +; RV32IM-NEXT: mul s5, a6, a3 +; RV32IM-NEXT: mul s6, a6, a4 +; RV32IM-NEXT: mul s2, a6, a5 +; RV32IM-NEXT: mul a1, a6, a7 +; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a6, t0 +; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t6, a6, t1 +; RV32IM-NEXT: mul t2, a6, t2 +; RV32IM-NEXT: mul s7, a6, a0 +; RV32IM-NEXT: mul a0, a6, t3 +; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, t4 +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t1, a6, s11 ; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t1, a4, a0 -; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a7, a4, a0 -; RV32IM-NEXT: mul s1, a4, s0 +; RV32IM-NEXT: mul a7, a6, a0 ; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul t5, a6, a0 ; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s8, a6, a0 ; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a3, a4, s3 +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a6, s0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a4, a0 +; RV32IM-NEXT: mul a3, a6, a0 +; RV32IM-NEXT: mul a2, a6, s4 ; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a6, a4, a0 +; RV32IM-NEXT: mul a5, a6, a0 ; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t4, a4, a0 -; RV32IM-NEXT: mul s6, a4, s9 +; RV32IM-NEXT: mul t3, a6, a0 ; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a4, a0 +; RV32IM-NEXT: mul s4, a6, a0 ; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a4, a5 -; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a4, t0 -; RV32IM-NEXT: lw s0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a4, s0 -; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s3, a4, s3 -; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s9, a4, s9 -; RV32IM-NEXT: lw s10, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a4, s10 -; RV32IM-NEXT: lw s10, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor ra, ra, s10 -; RV32IM-NEXT: xor s8, s11, s8 -; RV32IM-NEXT: xor s4, s7, s4 -; RV32IM-NEXT: xor t2, s2, t2 +; RV32IM-NEXT: mul a1, a6, a0 +; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a6, a0 +; RV32IM-NEXT: lw a4, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a6, a4 +; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a6, t0 +; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t4, a6, t4 +; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a6, s0 +; RV32IM-NEXT: lw s11, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s11, a6, s11 +; RV32IM-NEXT: sw s11, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a6, a6, s11 +; RV32IM-NEXT: xor s10, s10, ra +; RV32IM-NEXT: xor s5, s9, s5 +; RV32IM-NEXT: xor s2, s6, s2 +; RV32IM-NEXT: xor t2, t6, t2 ; RV32IM-NEXT: xor a7, t1, a7 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a1, a0 -; RV32IM-NEXT: xor a1, ra, s8 -; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, s4, a3 -; RV32IM-NEXT: lw t1, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor t1, t2, t1 -; RV32IM-NEXT: xor a7, a7, s1 -; RV32IM-NEXT: xor a2, a2, a6 -; RV32IM-NEXT: xor a0, a0, a5 +; RV32IM-NEXT: xor a1, s10, s5 +; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s2, a3 +; RV32IM-NEXT: xor t1, t2, s7 +; RV32IM-NEXT: xor a7, a7, t5 +; RV32IM-NEXT: xor a2, a2, a5 +; RV32IM-NEXT: xor a0, a0, a4 ; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, t1, a3 -; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a7, a5 -; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a4, a7, s8 +; RV32IM-NEXT: xor a2, a2, t3 ; RV32IM-NEXT: xor a0, a0, t0 -; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a1, a6 -; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a3, a6 -; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a2, a2, s6 +; RV32IM-NEXT: lw a5, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a5 +; RV32IM-NEXT: lw a5, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: lw a5, 68(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a2, a2, s4 +; RV32IM-NEXT: xor a0, a0, t4 +; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a4, a4, a5 ; RV32IM-NEXT: xor a0, a0, s0 -; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a0, a0, s3 +; RV32IM-NEXT: lui a5, 349525 +; RV32IM-NEXT: addi a5, a5, 1364 ; RV32IM-NEXT: xor a3, a1, a3 ; RV32IM-NEXT: slli a1, a1, 24 -; RV32IM-NEXT: xor a3, a3, a5 -; RV32IM-NEXT: xor a0, a0, s9 -; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a3, a3, a4 +; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a0, a0, a4 -; RV32IM-NEXT: and a3, a2, s5 +; RV32IM-NEXT: xor a2, a3, a2 +; RV32IM-NEXT: xor a0, a0, a6 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a6 ; RV32IM-NEXT: srli a4, a2, 8 ; RV32IM-NEXT: xor a0, a2, a0 ; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, s5 +; RV32IM-NEXT: and a2, a4, a6 ; RV32IM-NEXT: srli a0, a0, 24 ; RV32IM-NEXT: or a1, a1, a3 ; RV32IM-NEXT: or a0, a2, a0 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: and a0, a0, t3 -; RV32IM-NEXT: and a1, a1, t3 +; RV32IM-NEXT: and a0, a0, s1 +; RV32IM-NEXT: and a1, a1, s1 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: and a0, a0, t5 -; RV32IM-NEXT: and a1, a1, t5 +; RV32IM-NEXT: and a0, a0, s3 +; RV32IM-NEXT: and a1, a1, s3 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: and a0, a0, t6 -; RV32IM-NEXT: and a1, a1, t6 +; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a0, a0, a2 +; RV32IM-NEXT: and a1, a1, a5 ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a0, a1, a0 +; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -6807,74 +6836,73 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: sd s10, 416(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 408(sp) # 8-byte Folded Spill ; RV64IM-NEXT: srli a3, a0, 24 -; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: srli t0, a0, 8 ; RV64IM-NEXT: li s1, 255 -; RV64IM-NEXT: srli a6, a0, 40 +; RV64IM-NEXT: srli a5, a0, 40 ; RV64IM-NEXT: lui a4, 16 ; RV64IM-NEXT: srli t2, a0, 56 ; RV64IM-NEXT: srliw t3, a0, 24 ; RV64IM-NEXT: slli a2, a0, 56 ; RV64IM-NEXT: lui t4, 61681 -; RV64IM-NEXT: lui t6, 209715 +; RV64IM-NEXT: lui s0, 209715 ; RV64IM-NEXT: lui s9, 349525 ; RV64IM-NEXT: srli s7, a1, 24 ; RV64IM-NEXT: srli s5, a1, 8 ; RV64IM-NEXT: srli t5, a1, 40 -; RV64IM-NEXT: srli t0, a1, 56 +; RV64IM-NEXT: srli a7, a1, 56 ; RV64IM-NEXT: srliw ra, a1, 24 -; RV64IM-NEXT: slli a5, a1, 56 +; RV64IM-NEXT: slli a6, a1, 56 ; RV64IM-NEXT: li t1, 1 -; RV64IM-NEXT: lui s10, 128 +; RV64IM-NEXT: lui s11, 128 ; RV64IM-NEXT: lui s2, 256 ; RV64IM-NEXT: lui s3, 4096 -; RV64IM-NEXT: lui s0, 8192 +; RV64IM-NEXT: lui t6, 8192 ; RV64IM-NEXT: lui s8, 4080 ; RV64IM-NEXT: and a3, a3, s8 ; RV64IM-NEXT: slli s1, s1, 24 -; RV64IM-NEXT: addi s11, a4, -256 -; RV64IM-NEXT: and a7, a7, s1 +; RV64IM-NEXT: addi s10, a4, -256 +; RV64IM-NEXT: and t0, t0, s1 ; RV64IM-NEXT: sd s1, 400(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a3, a7, a3 -; RV64IM-NEXT: and a7, a0, s8 +; RV64IM-NEXT: or a3, t0, a3 +; RV64IM-NEXT: and t0, a0, s8 ; RV64IM-NEXT: slli t3, t3, 32 ; RV64IM-NEXT: addi s4, t4, -241 -; RV64IM-NEXT: addi s6, t6, 819 +; RV64IM-NEXT: addi s6, s0, 819 ; RV64IM-NEXT: addi a4, s9, 1365 ; RV64IM-NEXT: and t4, s7, s8 -; RV64IM-NEXT: and a6, a6, s11 -; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: and a5, a5, s10 +; RV64IM-NEXT: or a5, a5, t2 ; RV64IM-NEXT: and t2, a1, s8 -; RV64IM-NEXT: slli t6, ra, 32 -; RV64IM-NEXT: slli a7, a7, 24 -; RV64IM-NEXT: or s9, a7, t3 -; RV64IM-NEXT: slli a7, s4, 32 -; RV64IM-NEXT: add s4, s4, a7 -; RV64IM-NEXT: slli a7, s6, 32 -; RV64IM-NEXT: add s6, s6, a7 -; RV64IM-NEXT: slli s7, t1, 11 -; RV64IM-NEXT: and a7, s5, s1 -; RV64IM-NEXT: or a7, a7, t4 -; RV64IM-NEXT: slli t4, t1, 32 -; RV64IM-NEXT: and t3, t5, s11 -; RV64IM-NEXT: or t0, t3, t0 +; RV64IM-NEXT: slli s0, ra, 32 +; RV64IM-NEXT: slli t0, t0, 24 +; RV64IM-NEXT: or s9, t0, t3 +; RV64IM-NEXT: slli t0, s4, 32 +; RV64IM-NEXT: add s4, s4, t0 +; RV64IM-NEXT: slli t0, s6, 32 +; RV64IM-NEXT: add s6, s6, t0 +; RV64IM-NEXT: slli s7, t1, 11 +; RV64IM-NEXT: and t0, s5, s1 +; RV64IM-NEXT: or t0, t0, t4 +; RV64IM-NEXT: slli t4, t1, 32 +; RV64IM-NEXT: and t3, t5, s10 +; RV64IM-NEXT: or a7, t3, a7 ; RV64IM-NEXT: slli ra, t1, 33 ; RV64IM-NEXT: slli t2, t2, 24 -; RV64IM-NEXT: or t2, t2, t6 -; RV64IM-NEXT: slli s1, t1, 34 -; RV64IM-NEXT: or a3, a3, a6 -; RV64IM-NEXT: slli a6, t1, 35 -; RV64IM-NEXT: sd a6, 328(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s11, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s11 +; RV64IM-NEXT: or t2, t2, s0 +; RV64IM-NEXT: slli s0, t1, 34 +; RV64IM-NEXT: or a3, a3, a5 +; RV64IM-NEXT: slli s1, t1, 35 +; RV64IM-NEXT: sd s10, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 ; RV64IM-NEXT: slli a0, a0, 40 ; RV64IM-NEXT: or a0, a2, a0 ; RV64IM-NEXT: slli a2, t1, 36 -; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a7, t0 +; RV64IM-NEXT: sd a2, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, t0, a7 ; RV64IM-NEXT: slli a7, t1, 37 -; RV64IM-NEXT: and a1, a1, s11 +; RV64IM-NEXT: and a1, a1, s10 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a5, a1 +; RV64IM-NEXT: or a1, a6, a1 ; RV64IM-NEXT: sd a4, 392(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a5, a4, 32 ; RV64IM-NEXT: add a5, a4, a5 @@ -6946,56 +6974,57 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 40 -; RV64IM-NEXT: and a2, s5, s10 +; RV64IM-NEXT: and a2, s5, s11 ; RV64IM-NEXT: and a3, s5, s2 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 41 ; RV64IM-NEXT: and a3, s5, s3 -; RV64IM-NEXT: and a4, s5, s0 +; RV64IM-NEXT: and a4, s5, t6 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t1, 48 ; RV64IM-NEXT: and a4, s5, t4 ; RV64IM-NEXT: and a5, s5, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t1, 49 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 56 ; RV64IM-NEXT: and a2, s5, a3 ; RV64IM-NEXT: and a3, s5, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 57 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli ra, t1, 39 ; RV64IM-NEXT: slli a2, t1, 42 ; RV64IM-NEXT: slli a4, t1, 43 -; RV64IM-NEXT: slli s0, t1, 44 -; RV64IM-NEXT: slli s2, t1, 45 -; RV64IM-NEXT: slli s3, t1, 46 -; RV64IM-NEXT: slli s6, t1, 47 +; RV64IM-NEXT: slli s2, t1, 44 +; RV64IM-NEXT: slli s3, t1, 45 +; RV64IM-NEXT: slli s6, t1, 46 +; RV64IM-NEXT: slli a1, t1, 47 +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 50 ; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 51 @@ -7054,43 +7083,43 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: and a5, s5, t5 ; RV64IM-NEXT: lui t6, 262144 ; RV64IM-NEXT: and a6, s5, t6 -; RV64IM-NEXT: and s11, s5, s1 -; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and t5, s5, a1 -; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, s5, s0 +; RV64IM-NEXT: and t5, s5, s1 +; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and t6, s5, a1 ; RV64IM-NEXT: and a1, s5, a7 -; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s5, s4 -; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and ra, s5, ra ; RV64IM-NEXT: and a1, s5, a2 -; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s5, a4 +; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s2 ; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s0 +; RV64IM-NEXT: and a1, s5, s3 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: and a1, s5, s6 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s3 +; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s6 -; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s4, s5, a1 ; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload @@ -7110,13 +7139,13 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: srli t3, s5, 63 ; RV64IM-NEXT: mul s2, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s0, a0, a3 ; RV64IM-NEXT: mul a1, a0, t1 ; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload @@ -7124,16 +7153,16 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t1, a0, a1 ; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul s3, a0, a1 ; RV64IM-NEXT: mul a1, a0, t0 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a7, a0, t2 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul s1, a0, a1 @@ -7141,47 +7170,46 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a5 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a6 -; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a5, a0, s11 ; RV64IM-NEXT: mul t2, a0, t5 ; RV64IM-NEXT: mul s11, a0, t6 -; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, ra -; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a2, a0, a1 -; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a6, a0, a1 -; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t5, a0, a1 -; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul s5, a0, a1 -; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: ld a3, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a3, 32(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a3, a0, a3 -; RV64IM-NEXT: ld t0, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t0, 24(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t0, a0, t0 -; RV64IM-NEXT: ld t6, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t6, a0, t6 -; RV64IM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul ra, a0, ra ; RV64IM-NEXT: mul s4, a0, s4 -; RV64IM-NEXT: sd s4, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s6, a0, s6 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -7190,80 +7218,83 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: slli t3, t3, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, t3 +; RV64IM-NEXT: sd a0, 320(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld t3, 360(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld s4, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, s4 -; RV64IM-NEXT: ld s4, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s2, s4, s2 -; RV64IM-NEXT: ld s4, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s4, s0 -; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, s4, t4 -; RV64IM-NEXT: ld s4, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, s4, t1 -; RV64IM-NEXT: ld s4, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, s4, a7 -; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, s4, a5 -; RV64IM-NEXT: ld s4, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, s4, a2 -; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, s4, a1 -; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, s4, s6 +; RV64IM-NEXT: ld a0, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, a0 +; RV64IM-NEXT: ld a0, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, a0, s2 +; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, a0, s0 +; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, a0, t4 +; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, a0, t1 +; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a0, a7 +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a0, a5 +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a0, a2 +; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a0, a1 +; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, a0, s6 ; RV64IM-NEXT: xor t3, t3, s2 -; RV64IM-NEXT: ld s2, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s0, s2 -; RV64IM-NEXT: ld s2, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, t4, s2 +; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, a0 +; RV64IM-NEXT: ld a0, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, t4, a0 ; RV64IM-NEXT: xor t1, t1, s3 ; RV64IM-NEXT: xor a7, a7, s1 ; RV64IM-NEXT: xor a5, a5, t2 ; RV64IM-NEXT: xor a2, a2, a6 ; RV64IM-NEXT: xor a1, a1, a3 ; RV64IM-NEXT: xor a3, s6, s7 -; RV64IM-NEXT: ld a6, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, t3, a6 -; RV64IM-NEXT: ld t2, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, s0, t2 -; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t4, t3 -; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, t4 -; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, t3, a0 +; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s0, a0 +; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, a0 +; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 ; RV64IM-NEXT: xor a5, a5, s11 ; RV64IM-NEXT: xor a2, a2, t5 ; RV64IM-NEXT: xor a1, a1, t0 ; RV64IM-NEXT: xor a3, a3, s8 -; RV64IM-NEXT: ld t0, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t3, t0 -; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, t3 -; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, t3 +; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t3, a0 +; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 ; RV64IM-NEXT: xor a2, a2, s5 ; RV64IM-NEXT: xor a1, a1, t6 ; RV64IM-NEXT: xor a3, a3, s9 ; RV64IM-NEXT: xor t2, a6, t2 ; RV64IM-NEXT: xor t0, t2, t0 -; RV64IM-NEXT: ld t2, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, t2 -; RV64IM-NEXT: ld t2, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, t2 -; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, t2 +; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 ; RV64IM-NEXT: xor a1, a1, ra ; RV64IM-NEXT: xor a3, a3, s10 ; RV64IM-NEXT: xor t0, t0, t1 ; RV64IM-NEXT: xor a4, a7, a4 -; RV64IM-NEXT: ld a7, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, a7 -; RV64IM-NEXT: ld a7, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a7 +; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: xor a1, a1, s4 +; RV64IM-NEXT: lui a7, %hi(.LCPI11_0) +; RV64IM-NEXT: ld a7, %lo(.LCPI11_0)(a7) ; RV64IM-NEXT: slli a6, a6, 56 -; RV64IM-NEXT: ld a7, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a1, a7 +; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a0, a3, a0 ; RV64IM-NEXT: ld t1, 368(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a3, t0, t1 @@ -7271,8 +7302,8 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: slli a3, a3, 40 ; RV64IM-NEXT: xor a4, a4, a5 ; RV64IM-NEXT: or a3, a6, a3 -; RV64IM-NEXT: lui a7, 4080 -; RV64IM-NEXT: and a5, a4, a7 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a4, t0 ; RV64IM-NEXT: xor a2, a4, a2 ; RV64IM-NEXT: srli a4, a4, 8 ; RV64IM-NEXT: slli a5, a5, 24 @@ -7281,12 +7312,12 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: and a4, a4, a6 ; RV64IM-NEXT: srli a2, a2, 24 ; RV64IM-NEXT: srliw a6, a1, 24 -; RV64IM-NEXT: and a2, a2, a7 -; RV64IM-NEXT: srli a7, a1, 40 -; RV64IM-NEXT: xor a0, a1, a0 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: xor a0, a1, a0 ; RV64IM-NEXT: slli a6, a6, 32 ; RV64IM-NEXT: or a2, a4, a2 -; RV64IM-NEXT: and a1, a7, t1 +; RV64IM-NEXT: and a1, t0, t1 ; RV64IM-NEXT: srli a0, a0, 56 ; RV64IM-NEXT: or a4, a5, a6 ; RV64IM-NEXT: or a0, a1, a0 @@ -7308,9 +7339,11 @@ define i32 @clmulr_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: srli a1, a0, 1 ; RV64IM-NEXT: ld a2, 392(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: and a1, a1, a7 ; RV64IM-NEXT: slli a0, a0, 1 ; RV64IM-NEXT: or a0, a1, a0 +; RV64IM-NEXT: slli a0, a0, 31 +; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ld ra, 504(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 496(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 488(sp) # 8-byte Folded Reload @@ -7594,9 +7627,8 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV32IM-NEXT: andi a1, a0, 5 ; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: slli a1, a1, 1 -; RV32IM-NEXT: andi a0, a0, 20 +; RV32IM-NEXT: andi a0, a0, 5 ; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -7629,82 +7661,83 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a3, a0, 24 -; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: srli a4, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 ; RV64IM-NEXT: li s4, 255 -; RV64IM-NEXT: srli a4, a0, 40 -; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s8, 16 ; RV64IM-NEXT: srli t1, a0, 56 -; RV64IM-NEXT: srliw t4, a0, 24 -; RV64IM-NEXT: slli a5, a0, 56 +; RV64IM-NEXT: srliw t3, a0, 24 +; RV64IM-NEXT: slli t4, a0, 56 ; RV64IM-NEXT: lui s3, 61681 ; RV64IM-NEXT: lui t5, 209715 ; RV64IM-NEXT: lui s6, 349525 ; RV64IM-NEXT: srli s9, a1, 24 ; RV64IM-NEXT: srli s0, a1, 8 -; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli a7, a1, 40 ; RV64IM-NEXT: srli t2, a1, 56 ; RV64IM-NEXT: srliw s11, a1, 24 -; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: slli a3, a1, 56 ; RV64IM-NEXT: li t0, 1 ; RV64IM-NEXT: lui s1, 128 ; RV64IM-NEXT: lui s2, 256 ; RV64IM-NEXT: lui t6, 4096 ; RV64IM-NEXT: lui s5, 8192 ; RV64IM-NEXT: lui s7, 4080 -; RV64IM-NEXT: and a2, a3, s7 -; RV64IM-NEXT: slli t3, s4, 24 -; RV64IM-NEXT: addi s8, s10, -256 -; RV64IM-NEXT: and a3, a7, t3 -; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a3, a0, s7 -; RV64IM-NEXT: slli t4, t4, 32 +; RV64IM-NEXT: and a2, a4, s7 +; RV64IM-NEXT: slli ra, s4, 24 +; RV64IM-NEXT: addi s10, s8, -256 +; RV64IM-NEXT: and a4, a6, ra +; RV64IM-NEXT: sd ra, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a4, a2 +; RV64IM-NEXT: and a4, a0, s7 +; RV64IM-NEXT: slli t3, t3, 32 ; RV64IM-NEXT: addi s3, s3, -241 ; RV64IM-NEXT: addi s4, t5, 819 ; RV64IM-NEXT: addi s6, s6, 1365 -; RV64IM-NEXT: and a7, s9, s7 -; RV64IM-NEXT: and a4, a4, s8 -; RV64IM-NEXT: or a4, a4, t1 +; RV64IM-NEXT: and a6, s9, s7 +; RV64IM-NEXT: and a5, a5, s10 +; RV64IM-NEXT: or a5, a5, t1 ; RV64IM-NEXT: and t1, a1, s7 ; RV64IM-NEXT: slli t5, s11, 32 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: or s9, a3, t4 -; RV64IM-NEXT: slli a3, s3, 32 -; RV64IM-NEXT: add s3, s3, a3 -; RV64IM-NEXT: slli a3, s4, 32 -; RV64IM-NEXT: add s4, s4, a3 -; RV64IM-NEXT: slli a3, s6, 32 -; RV64IM-NEXT: add s6, s6, a3 -; RV64IM-NEXT: slli t4, t0, 11 -; RV64IM-NEXT: and a3, s0, t3 -; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a4, a4, 24 +; RV64IM-NEXT: or s9, a4, t3 +; RV64IM-NEXT: slli a4, s3, 32 +; RV64IM-NEXT: add s3, s3, a4 +; RV64IM-NEXT: slli a4, s4, 32 +; RV64IM-NEXT: add s4, s4, a4 +; RV64IM-NEXT: slli a4, s6, 32 +; RV64IM-NEXT: add s6, s6, a4 +; RV64IM-NEXT: slli t3, t0, 11 +; RV64IM-NEXT: and a4, s0, ra +; RV64IM-NEXT: or a4, a4, a6 ; RV64IM-NEXT: slli s11, t0, 32 -; RV64IM-NEXT: and a7, ra, s8 -; RV64IM-NEXT: or a7, a7, t2 +; RV64IM-NEXT: and a6, a7, s10 +; RV64IM-NEXT: or a6, a6, t2 ; RV64IM-NEXT: slli ra, t0, 33 ; RV64IM-NEXT: slli t1, t1, 24 -; RV64IM-NEXT: or t1, t1, t5 +; RV64IM-NEXT: or a7, t1, t5 ; RV64IM-NEXT: slli s0, t0, 34 -; RV64IM-NEXT: or a2, a2, a4 -; RV64IM-NEXT: slli a4, t0, 35 -; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s8 -; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a2, a5 +; RV64IM-NEXT: slli a5, t0, 35 +; RV64IM-NEXT: sd a5, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 ; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, a5, a0 -; RV64IM-NEXT: slli a4, t0, 36 -; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a3, a3, a7 -; RV64IM-NEXT: slli a7, t0, 37 -; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: or a0, t4, a0 +; RV64IM-NEXT: slli a5, t0, 36 +; RV64IM-NEXT: sd a5, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli a6, t0, 37 +; RV64IM-NEXT: and a1, a1, s10 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a6, a1 -; RV64IM-NEXT: slli a6, t0, 38 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: slli a3, t0, 38 +; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: or a0, a0, s9 -; RV64IM-NEXT: or a1, a1, t1 +; RV64IM-NEXT: or a1, a1, a7 ; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: or a1, a1, a4 ; RV64IM-NEXT: srli a2, a0, 4 ; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a0, a0, s3 @@ -7761,7 +7794,7 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: xor a1, a2, a1 ; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli s3, t0, 39 +; RV64IM-NEXT: slli a7, t0, 39 ; RV64IM-NEXT: lui a1, 2 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: lui a2, 4 @@ -7776,52 +7809,51 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 41 ; RV64IM-NEXT: and a3, s6, t6 ; RV64IM-NEXT: and a4, s6, s5 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t0, 48 ; RV64IM-NEXT: and a4, s6, s11 ; RV64IM-NEXT: and a5, s6, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t0, 49 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 56 ; RV64IM-NEXT: and a2, s6, a3 ; RV64IM-NEXT: and a3, s6, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 57 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 42 ; RV64IM-NEXT: slli ra, t0, 43 ; RV64IM-NEXT: slli a4, t0, 44 ; RV64IM-NEXT: slli t6, t0, 45 ; RV64IM-NEXT: slli s1, t0, 46 ; RV64IM-NEXT: slli s2, t0, 47 -; RV64IM-NEXT: slli s4, t0, 50 -; RV64IM-NEXT: slli s5, t0, 51 -; RV64IM-NEXT: slli a1, t0, 52 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s3, t0, 50 +; RV64IM-NEXT: slli s4, t0, 51 +; RV64IM-NEXT: slli s5, t0, 52 ; RV64IM-NEXT: slli a1, t0, 53 ; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 54 @@ -7838,7 +7870,7 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli t0, t0, 62 ; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, t4 +; RV64IM-NEXT: and a1, s6, t3 ; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui a3, 1 ; RV64IM-NEXT: and a1, s6, a3 @@ -7846,7 +7878,7 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s6, a3 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: and a1, s6, s8 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s9, 32 ; RV64IM-NEXT: and a1, s6, s9 @@ -7878,15 +7910,16 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: and s11, s6, s0 ; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, a7 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a6 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a2 ; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill @@ -7899,11 +7932,10 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, s2 ; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s4 +; RV64IM-NEXT: and a1, s6, s3 ; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and s0, s6, s5 -; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s1, s6, a1 +; RV64IM-NEXT: and s0, s6, s4 +; RV64IM-NEXT: and s1, s6, s5 ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s2, s6, a1 ; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload @@ -7927,14 +7959,14 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: srli s6, s6, 63 ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul t3, a0, a3 ; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t1, a0, a1 ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload @@ -7942,17 +7974,17 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t6, a0, a1 ; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a6, a0, a1 ; RV64IM-NEXT: mul t5, a0, t2 @@ -7960,25 +7992,25 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a5 -; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, t0 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a3, a0, s11 -; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t0, a0, a1 ; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a2, a0, a1 ; RV64IM-NEXT: mul a5, a0, ra @@ -7990,7 +8022,7 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul ra, a0, a1 ; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul s0, a0, s0 @@ -7998,6 +8030,7 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul s2, a0, s2 ; RV64IM-NEXT: mul s3, a0, s3 ; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s5, a0, s5 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -8006,83 +8039,80 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli s6, s6, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, s6 -; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, s6, a0 -; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, a0, t4 -; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, a0, t3 -; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, a0, t1 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a0, a7 -; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a0, a6 -; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a0, a3 -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a0, a2 -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, a0, s5 +; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, s4 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, s4, t3 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s4, t1 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, s4, a7 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, s4, a6 +; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s4, a3 +; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s4, s5 ; RV64IM-NEXT: xor t4, s6, t4 -; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, a0 -; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld s4, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, s4 +; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, s4 ; RV64IM-NEXT: xor a7, a7, t6 ; RV64IM-NEXT: xor a6, a6, t5 ; RV64IM-NEXT: xor a3, a3, t0 ; RV64IM-NEXT: xor a2, a2, a5 ; RV64IM-NEXT: xor a1, a1, s0 ; RV64IM-NEXT: xor a5, s5, s7 -; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t4, a0 -; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, a0 -; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, a0 -; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, a0 -; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, a0 -; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld t0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, t0 +; RV64IM-NEXT: ld t4, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, t4 +; RV64IM-NEXT: ld t4, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t4 +; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t4 +; RV64IM-NEXT: ld t4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t4 ; RV64IM-NEXT: xor a2, a2, t2 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a5, a5, s8 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, a0 -; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, a0 -; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld t2, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 ; RV64IM-NEXT: xor a2, a2, s11 ; RV64IM-NEXT: xor a1, a1, s2 ; RV64IM-NEXT: xor a5, a5, s9 ; RV64IM-NEXT: xor t2, t0, t3 ; RV64IM-NEXT: xor t1, t2, t1 -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, a0 -; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld t2, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 ; RV64IM-NEXT: xor a2, a2, ra ; RV64IM-NEXT: xor a1, a1, s3 ; RV64IM-NEXT: xor a5, a5, s10 ; RV64IM-NEXT: xor a7, t1, a7 ; RV64IM-NEXT: xor a4, a6, a4 -; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 -; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a0 -; RV64IM-NEXT: xor a1, a1, s4 -; RV64IM-NEXT: lui a6, %hi(.LCPI14_0) -; RV64IM-NEXT: ld a6, %lo(.LCPI14_0)(a6) +; RV64IM-NEXT: ld a6, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a6 +; RV64IM-NEXT: ld a6, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a6 ; RV64IM-NEXT: slli t0, t0, 56 -; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a6 ; RV64IM-NEXT: xor a0, a5, a0 ; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a5, a7, t1 @@ -8090,24 +8120,24 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli a5, a5, 40 ; RV64IM-NEXT: xor a3, a4, a3 ; RV64IM-NEXT: or a4, t0, a5 -; RV64IM-NEXT: lui t0, 4080 -; RV64IM-NEXT: and a5, a3, t0 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a3, a7 ; RV64IM-NEXT: xor a2, a3, a2 ; RV64IM-NEXT: srli a3, a3, 8 ; RV64IM-NEXT: slli a5, a5, 24 ; RV64IM-NEXT: xor a1, a2, a1 -; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: ld a6, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a6 ; RV64IM-NEXT: srli a2, a2, 24 -; RV64IM-NEXT: srliw a7, a1, 24 -; RV64IM-NEXT: and a2, a2, t0 -; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: srli a7, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: slli a6, a6, 32 ; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: and a1, a7, t1 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a5, a7 +; RV64IM-NEXT: or a3, a5, a6 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -8124,13 +8154,11 @@ define i4 @clmulh_i4(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: slli a0, a0, 2 ; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: andi a1, a0, 5 +; RV64IM-NEXT: srli a0, a0, 1 +; RV64IM-NEXT: slli a1, a1, 1 ; RV64IM-NEXT: andi a0, a0, 5 -; RV64IM-NEXT: and a1, a1, a6 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: slli a0, a0, 59 -; RV64IM-NEXT: srli a0, a0, 60 +; RV64IM-NEXT: or a0, a0, a1 ; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload @@ -8175,253 +8203,250 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV32IM-NEXT: lui a3, 16 ; RV32IM-NEXT: srli t1, a0, 24 ; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui s1, 61681 -; RV32IM-NEXT: lui s3, 209715 -; RV32IM-NEXT: lui a6, 349525 +; RV32IM-NEXT: lui t3, 61681 +; RV32IM-NEXT: lui t5, 209715 +; RV32IM-NEXT: lui t6, 349525 ; RV32IM-NEXT: srli t4, a1, 8 -; RV32IM-NEXT: srli t6, a1, 24 -; RV32IM-NEXT: slli a4, a1, 24 -; RV32IM-NEXT: li t3, 1 -; RV32IM-NEXT: lui s11, 2 +; RV32IM-NEXT: srli a4, a1, 24 +; RV32IM-NEXT: slli a5, a1, 24 +; RV32IM-NEXT: li s7, 1 ; RV32IM-NEXT: lui t2, 4 -; RV32IM-NEXT: lui s10, 8 -; RV32IM-NEXT: lui t5, 32 -; RV32IM-NEXT: lui s0, 64 -; RV32IM-NEXT: lui s2, 128 +; RV32IM-NEXT: lui s0, 8 +; RV32IM-NEXT: lui s1, 32 +; RV32IM-NEXT: lui s2, 64 +; RV32IM-NEXT: lui s3, 128 ; RV32IM-NEXT: lui s4, 256 -; RV32IM-NEXT: lui s5, 512 -; RV32IM-NEXT: lui s6, 1024 -; RV32IM-NEXT: lui s7, 2048 -; RV32IM-NEXT: lui s8, 4096 -; RV32IM-NEXT: lui s9, 8192 +; RV32IM-NEXT: lui s8, 512 +; RV32IM-NEXT: lui a7, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 ; RV32IM-NEXT: lui ra, 16384 -; RV32IM-NEXT: addi a3, a3, -256 -; RV32IM-NEXT: lui a5, 16 -; RV32IM-NEXT: and t0, t0, a3 +; RV32IM-NEXT: addi s5, a3, -256 +; RV32IM-NEXT: and t0, t0, s5 ; RV32IM-NEXT: or t1, t0, t1 -; RV32IM-NEXT: lui a7, 32768 -; RV32IM-NEXT: and t4, t4, a3 -; RV32IM-NEXT: or t6, t4, t6 +; RV32IM-NEXT: lui a6, 32768 +; RV32IM-NEXT: and t4, t4, s5 +; RV32IM-NEXT: or a4, t4, a4 ; RV32IM-NEXT: lui t0, 65536 -; RV32IM-NEXT: and a0, a0, a3 -; RV32IM-NEXT: mv t4, a3 -; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a0, a0, s5 ; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a2, a2, a0 -; RV32IM-NEXT: lui a3, 131072 -; RV32IM-NEXT: and a1, a1, t4 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s5 ; RV32IM-NEXT: slli a1, a1, 8 -; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: or t4, a5, a1 ; RV32IM-NEXT: lui a1, 262144 -; RV32IM-NEXT: addi s1, s1, -241 -; RV32IM-NEXT: addi s3, s3, 819 -; RV32IM-NEXT: or a2, a2, t1 -; RV32IM-NEXT: addi a4, a6, 1365 -; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: or a0, a0, t6 -; RV32IM-NEXT: srli a6, a2, 4 -; RV32IM-NEXT: and a2, a2, s1 -; RV32IM-NEXT: and a6, a6, s1 -; RV32IM-NEXT: slli a2, a2, 4 -; RV32IM-NEXT: or a2, a6, a2 -; RV32IM-NEXT: srli a6, a0, 4 -; RV32IM-NEXT: and a0, a0, s1 -; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: or a0, a0, t1 +; RV32IM-NEXT: lui a5, 524288 +; RV32IM-NEXT: addi t3, t3, -241 +; RV32IM-NEXT: addi t5, t5, 819 +; RV32IM-NEXT: addi t6, t6, 1365 +; RV32IM-NEXT: slli s7, s7, 11 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 4 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 ; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a6, a0 -; RV32IM-NEXT: srli a6, a2, 2 -; RV32IM-NEXT: and a2, a2, s3 -; RV32IM-NEXT: and a6, a6, s3 -; RV32IM-NEXT: slli a2, a2, 2 -; RV32IM-NEXT: or a2, a6, a2 -; RV32IM-NEXT: srli a6, a0, 2 -; RV32IM-NEXT: and a0, a0, s3 -; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 4 +; RV32IM-NEXT: and a4, a4, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a4, a4, 4 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 2 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and t4, t4, t5 ; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a6, a0 -; RV32IM-NEXT: srli a6, a2, 1 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: and a6, a6, a4 -; RV32IM-NEXT: slli a2, a2, 1 -; RV32IM-NEXT: or a6, a6, a2 -; RV32IM-NEXT: srli a2, a0, 1 -; RV32IM-NEXT: and a0, a0, a4 -; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 2 +; RV32IM-NEXT: and a4, a4, t5 +; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: slli a4, a4, 2 +; RV32IM-NEXT: or t4, t4, a4 +; RV32IM-NEXT: srli a4, a0, 1 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a4, a4, t6 ; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: lui a2, 524288 -; RV32IM-NEXT: slli t3, t3, 11 -; RV32IM-NEXT: and t3, a0, t3 -; RV32IM-NEXT: lui a4, 1 -; RV32IM-NEXT: and t4, a0, a4 -; RV32IM-NEXT: and s11, a0, s11 -; RV32IM-NEXT: and a4, a0, t2 -; RV32IM-NEXT: sw a4, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s10 -; RV32IM-NEXT: sw a4, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a5, a0, a5 -; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, t5 -; RV32IM-NEXT: sw a4, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a4, a4, a0 +; RV32IM-NEXT: srli a0, t4, 1 +; RV32IM-NEXT: and t4, t4, t6 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: slli t4, t4, 1 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: andi t4, a0, 2 +; RV32IM-NEXT: and s6, a0, s7 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t1, 2 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t1, a0, t2 +; RV32IM-NEXT: sw t1, 76(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and s0, a0, s0 -; RV32IM-NEXT: and a4, a0, s2 -; RV32IM-NEXT: sw a4, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s4, a0, s4 -; RV32IM-NEXT: and a4, a0, s5 -; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s6 -; RV32IM-NEXT: sw a4, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s7 -; RV32IM-NEXT: sw a4, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s8 -; RV32IM-NEXT: sw a4, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s9 -; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, ra -; RV32IM-NEXT: sw a4, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, a7 -; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, t0 -; RV32IM-NEXT: sw a4, 28(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a3, a0, a3 -; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a1 -; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s2 +; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s3, a0, s3 +; RV32IM-NEXT: and a3, a0, s4 +; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s8 +; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a7 +; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s9, a0, s9 +; RV32IM-NEXT: and a3, a0, s10 +; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s11 +; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, ra +; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a6 +; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, t0 +; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, a2 -; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi ra, a0, 2 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill ; RV32IM-NEXT: andi a1, a0, 1 ; RV32IM-NEXT: andi a2, a0, 4 ; RV32IM-NEXT: andi a3, a0, 8 -; RV32IM-NEXT: andi a4, a0, 16 -; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a5, a0, 16 +; RV32IM-NEXT: andi a6, a0, 32 ; RV32IM-NEXT: andi a7, a0, 64 ; RV32IM-NEXT: andi t0, a0, 128 ; RV32IM-NEXT: andi t1, a0, 256 ; RV32IM-NEXT: andi t2, a0, 512 ; RV32IM-NEXT: andi a0, a0, 1024 -; RV32IM-NEXT: mul ra, a6, ra -; RV32IM-NEXT: mul s10, a6, a1 -; RV32IM-NEXT: mul s9, a6, a2 -; RV32IM-NEXT: mul s5, a6, a3 -; RV32IM-NEXT: mul s6, a6, a4 -; RV32IM-NEXT: mul s2, a6, a5 -; RV32IM-NEXT: mul a1, a6, a7 -; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a6, t0 -; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t6, a6, t1 -; RV32IM-NEXT: mul t2, a6, t2 -; RV32IM-NEXT: mul s7, a6, a0 -; RV32IM-NEXT: mul a0, a6, t3 -; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a6, t4 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t1, a6, s11 +; RV32IM-NEXT: mul t4, a4, t4 +; RV32IM-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, a4, a1 +; RV32IM-NEXT: mul s11, a4, a2 +; RV32IM-NEXT: mul s8, a4, a3 +; RV32IM-NEXT: mul s7, a4, a5 +; RV32IM-NEXT: mul s4, a4, a6 +; RV32IM-NEXT: mul a1, a4, a7 +; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a4, t0 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s2, a4, t1 +; RV32IM-NEXT: mul t2, a4, t2 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, s6 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a7, a6, a0 +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a4, a0 +; RV32IM-NEXT: mul s1, a4, s0 ; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t5, a6, a0 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s8, a6, a0 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a6, a0 -; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a6, s0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, a4, s3 ; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a3, a6, a0 -; RV32IM-NEXT: mul a2, a6, s4 +; RV32IM-NEXT: mul a2, a4, a0 ; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a6, a0 +; RV32IM-NEXT: mul a6, a4, a0 ; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t3, a6, a0 +; RV32IM-NEXT: mul t4, a4, a0 +; RV32IM-NEXT: mul s6, a4, s9 ; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s4, a6, a0 +; RV32IM-NEXT: mul a1, a4, a0 ; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a6, a0 -; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a6, a0 -; RV32IM-NEXT: lw a4, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a6, a4 -; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a6, t0 -; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t4, a6, t4 -; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a6, s0 -; RV32IM-NEXT: lw s11, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s11, a6, s11 -; RV32IM-NEXT: sw s11, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a6, a6, s11 -; RV32IM-NEXT: xor s10, s10, ra -; RV32IM-NEXT: xor s5, s9, s5 -; RV32IM-NEXT: xor s2, s6, s2 -; RV32IM-NEXT: xor t2, t6, t2 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a4, a5 +; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a4, t0 +; RV32IM-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a4, s0 +; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a4, s3 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s9, a4, s9 +; RV32IM-NEXT: lw s10, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a4, s10 +; RV32IM-NEXT: lw s10, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor ra, ra, s10 +; RV32IM-NEXT: xor s8, s11, s8 +; RV32IM-NEXT: xor s4, s7, s4 +; RV32IM-NEXT: xor t2, s2, t2 ; RV32IM-NEXT: xor a7, t1, a7 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a1, a0 -; RV32IM-NEXT: xor a1, s10, s5 -; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, s2, a3 -; RV32IM-NEXT: xor t1, t2, s7 -; RV32IM-NEXT: xor a7, a7, t5 -; RV32IM-NEXT: xor a2, a2, a5 -; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a1, ra, s8 +; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s4, a3 +; RV32IM-NEXT: lw t1, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t2, t1 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 ; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, t1, a3 -; RV32IM-NEXT: xor a4, a7, s8 -; RV32IM-NEXT: xor a2, a2, t3 -; RV32IM-NEXT: xor a0, a0, t0 -; RV32IM-NEXT: lw a5, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a1, a5 -; RV32IM-NEXT: lw a5, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a3, a5 -; RV32IM-NEXT: lw a5, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, a5 -; RV32IM-NEXT: xor a2, a2, s4 -; RV32IM-NEXT: xor a0, a0, t4 ; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a5, a7, a5 +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s6 ; RV32IM-NEXT: xor a0, a0, s0 -; RV32IM-NEXT: lui a5, 349525 -; RV32IM-NEXT: addi a5, a5, 1364 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s3 ; RV32IM-NEXT: xor a3, a1, a3 ; RV32IM-NEXT: slli a1, a1, 24 -; RV32IM-NEXT: xor a3, a3, a4 -; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: xor a0, a0, s9 ; RV32IM-NEXT: xor a2, a3, a2 -; RV32IM-NEXT: xor a0, a0, a6 -; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a3, a2, a6 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: and a3, a2, s5 ; RV32IM-NEXT: srli a4, a2, 8 ; RV32IM-NEXT: xor a0, a2, a0 ; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, a6 +; RV32IM-NEXT: and a2, a4, s5 ; RV32IM-NEXT: srli a0, a0, 24 ; RV32IM-NEXT: or a1, a1, a3 ; RV32IM-NEXT: or a0, a2, a0 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: and a0, a0, s1 -; RV32IM-NEXT: and a1, a1, s1 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: and a0, a0, s3 -; RV32IM-NEXT: and a1, a1, s3 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and a1, a1, t5 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a5 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a1, a1, t6 ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -8454,81 +8479,80 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd s9, 408(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s10, 400(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 392(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a3, a0, 24 -; RV64IM-NEXT: srli a5, a0, 8 +; RV64IM-NEXT: srli a5, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 ; RV64IM-NEXT: li s4, 255 -; RV64IM-NEXT: srli ra, a0, 40 -; RV64IM-NEXT: lui s11, 16 -; RV64IM-NEXT: srli t0, a0, 56 +; RV64IM-NEXT: srli a4, a0, 40 +; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: srli a7, a0, 56 ; RV64IM-NEXT: srliw t2, a0, 24 -; RV64IM-NEXT: slli a6, a0, 56 -; RV64IM-NEXT: lui t3, 61681 -; RV64IM-NEXT: lui t4, 209715 -; RV64IM-NEXT: lui s8, 349525 +; RV64IM-NEXT: slli t3, a0, 56 +; RV64IM-NEXT: lui t4, 61681 +; RV64IM-NEXT: lui s6, 209715 +; RV64IM-NEXT: lui s5, 349525 ; RV64IM-NEXT: srli s3, a1, 24 ; RV64IM-NEXT: srli t6, a1, 8 -; RV64IM-NEXT: srli a7, a1, 40 -; RV64IM-NEXT: srli t5, a1, 56 +; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli t0, a1, 56 ; RV64IM-NEXT: srliw s7, a1, 24 -; RV64IM-NEXT: slli a4, a1, 56 +; RV64IM-NEXT: slli a3, a1, 56 ; RV64IM-NEXT: li t1, 1 ; RV64IM-NEXT: lui s1, 256 ; RV64IM-NEXT: lui s2, 4096 ; RV64IM-NEXT: lui s0, 8192 ; RV64IM-NEXT: lui s9, 4080 -; RV64IM-NEXT: and a2, a3, s9 -; RV64IM-NEXT: slli s5, s4, 24 -; RV64IM-NEXT: addi s10, s11, -256 -; RV64IM-NEXT: and a3, a5, s5 -; RV64IM-NEXT: sd s5, 384(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a3, a0, s9 +; RV64IM-NEXT: and a2, a5, s9 +; RV64IM-NEXT: slli t5, s4, 24 +; RV64IM-NEXT: addi s11, s10, -256 +; RV64IM-NEXT: and a5, a6, t5 +; RV64IM-NEXT: sd t5, 384(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a5, a2 +; RV64IM-NEXT: and a5, a0, s9 ; RV64IM-NEXT: slli t2, t2, 32 -; RV64IM-NEXT: addi s4, t3, -241 -; RV64IM-NEXT: addi s6, t4, 819 -; RV64IM-NEXT: addi s8, s8, 1365 -; RV64IM-NEXT: and a5, s3, s9 -; RV64IM-NEXT: and t3, ra, s10 -; RV64IM-NEXT: or t0, t3, t0 -; RV64IM-NEXT: and t3, a1, s9 +; RV64IM-NEXT: addi s4, t4, -241 +; RV64IM-NEXT: addi s6, s6, 819 +; RV64IM-NEXT: addi s8, s5, 1365 +; RV64IM-NEXT: and a6, s3, s9 +; RV64IM-NEXT: and a4, a4, s11 +; RV64IM-NEXT: or a4, a4, a7 +; RV64IM-NEXT: and a7, a1, s9 ; RV64IM-NEXT: slli t4, s7, 32 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: or s3, a3, t2 -; RV64IM-NEXT: slli a3, s4, 32 -; RV64IM-NEXT: add s4, s4, a3 -; RV64IM-NEXT: slli a3, s6, 32 -; RV64IM-NEXT: add s6, s6, a3 -; RV64IM-NEXT: slli a3, s8, 32 -; RV64IM-NEXT: add s8, s8, a3 -; RV64IM-NEXT: slli s7, t1, 11 -; RV64IM-NEXT: and a3, t6, s5 -; RV64IM-NEXT: or a3, a3, a5 +; RV64IM-NEXT: slli a5, a5, 24 +; RV64IM-NEXT: or s5, a5, t2 +; RV64IM-NEXT: slli a5, s4, 32 +; RV64IM-NEXT: add s4, s4, a5 +; RV64IM-NEXT: slli a5, s6, 32 +; RV64IM-NEXT: add s6, s6, a5 +; RV64IM-NEXT: slli a5, s8, 32 +; RV64IM-NEXT: add s8, s8, a5 +; RV64IM-NEXT: slli s3, t1, 11 +; RV64IM-NEXT: and a5, t6, t5 +; RV64IM-NEXT: or a5, a5, a6 ; RV64IM-NEXT: slli t2, t1, 32 -; RV64IM-NEXT: and a5, a7, s10 -; RV64IM-NEXT: or a5, a5, t5 -; RV64IM-NEXT: slli ra, t1, 33 -; RV64IM-NEXT: slli t3, t3, 24 -; RV64IM-NEXT: or a7, t3, t4 -; RV64IM-NEXT: slli t3, t1, 34 -; RV64IM-NEXT: sd t3, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a2, t0 -; RV64IM-NEXT: slli t0, t1, 35 -; RV64IM-NEXT: sd t0, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: sd s10, 352(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: and a6, ra, s11 +; RV64IM-NEXT: or a6, a6, t0 +; RV64IM-NEXT: slli ra, t1, 33 +; RV64IM-NEXT: slli a7, a7, 24 +; RV64IM-NEXT: or a7, a7, t4 +; RV64IM-NEXT: slli s7, t1, 34 +; RV64IM-NEXT: or a2, a2, a4 +; RV64IM-NEXT: slli a4, t1, 35 +; RV64IM-NEXT: sd a4, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s11 +; RV64IM-NEXT: sd s11, 352(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, a6, a0 -; RV64IM-NEXT: slli a6, t1, 36 -; RV64IM-NEXT: or a3, a3, a5 -; RV64IM-NEXT: slli a5, t1, 37 -; RV64IM-NEXT: sd a5, 288(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: or a0, t3, a0 +; RV64IM-NEXT: slli a4, t1, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a4, a5, a6 +; RV64IM-NEXT: slli a6, t1, 37 +; RV64IM-NEXT: and a1, a1, s11 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a4, a1 -; RV64IM-NEXT: or a0, a0, s3 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: or a0, a0, s5 ; RV64IM-NEXT: or a1, a1, a7 ; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: or a1, a1, a4 ; RV64IM-NEXT: srli a2, a0, 4 ; RV64IM-NEXT: sd s4, 376(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a0, a0, s4 @@ -8586,7 +8610,7 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: xor a1, a2, a1 ; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli s3, t1, 38 +; RV64IM-NEXT: slli s4, t1, 38 ; RV64IM-NEXT: lui a1, 2 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: lui a2, 4 @@ -8594,7 +8618,7 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 40 ; RV64IM-NEXT: lui a2, 128 ; RV64IM-NEXT: and a2, s5, a2 @@ -8602,42 +8626,42 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 288(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 41 ; RV64IM-NEXT: and a3, s5, s2 ; RV64IM-NEXT: and a4, s5, s0 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t1, 48 ; RV64IM-NEXT: and a4, s5, t2 ; RV64IM-NEXT: and a5, s5, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t1, 49 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 56 ; RV64IM-NEXT: and a2, s5, a3 ; RV64IM-NEXT: and a3, s5, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 57 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 39 ; RV64IM-NEXT: slli ra, t1, 42 ; RV64IM-NEXT: slli a4, t1, 43 @@ -8645,8 +8669,9 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli s0, t1, 45 ; RV64IM-NEXT: slli s1, t1, 46 ; RV64IM-NEXT: slli s2, t1, 47 -; RV64IM-NEXT: slli s4, t1, 50 -; RV64IM-NEXT: slli s6, t1, 51 +; RV64IM-NEXT: slli s6, t1, 50 +; RV64IM-NEXT: slli a1, t1, 51 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 52 ; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 53 @@ -8665,14 +8690,14 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli t1, t1, 62 ; RV64IM-NEXT: sd t1, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and t1, s5, s7 +; RV64IM-NEXT: and t1, s5, s3 ; RV64IM-NEXT: lui a3, 1 ; RV64IM-NEXT: and a1, s5, a3 ; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s5, a3 ; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s11 +; RV64IM-NEXT: and a1, s5, s10 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s8, 32 ; RV64IM-NEXT: and a1, s5, s8 @@ -8701,34 +8726,34 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: and a7, s5, t5 ; RV64IM-NEXT: lui t6, 262144 ; RV64IM-NEXT: and t6, s5, t6 +; RV64IM-NEXT: and s11, s5, s7 ; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s11, s5, a1 -; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, a6 ; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s3 +; RV64IM-NEXT: and a1, s5, a6 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, a2 +; RV64IM-NEXT: and a1, s5, s4 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, a2 +; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and ra, s5, ra ; RV64IM-NEXT: and a1, s5, a4 -; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, a5 ; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s0 +; RV64IM-NEXT: and a1, s5, a5 ; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s1 +; RV64IM-NEXT: and a1, s5, s0 ; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: and a1, s5, s1 ; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s4 +; RV64IM-NEXT: and a1, s5, s2 ; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and s0, s5, s6 +; RV64IM-NEXT: and a1, s5, s6 +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s0, s5, a1 ; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s1, s5, a1 ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload @@ -8754,13 +8779,13 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: srli s5, s5, 63 ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul t2, a0, a3 ; RV64IM-NEXT: mul a1, a0, t1 ; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t0, a0, a1 ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload @@ -8768,17 +8793,17 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: sd a1, 112(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a6, a0, a1 ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t5, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 104(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a5, a0, a1 ; RV64IM-NEXT: mul t3, a0, t3 @@ -8786,44 +8811,45 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a7 -; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, t6 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a2, a0, s11 -; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul s11, a0, a1 +; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s11, a0, a1 ; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill -; RV64IM-NEXT: mul ra, a0, ra +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a3, a0, a1 +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul ra, a0, ra ; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t1, a0, a1 +; RV64IM-NEXT: mul a3, a0, a1 ; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t6, a0, a1 +; RV64IM-NEXT: mul t1, a0, a1 ; RV64IM-NEXT: ld a1, 24(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul t6, a0, a1 ; RV64IM-NEXT: ld a1, 16(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul s0, a0, s0 ; RV64IM-NEXT: mul s1, a0, s1 ; RV64IM-NEXT: mul s2, a0, s2 ; RV64IM-NEXT: mul s3, a0, s3 ; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s6, a0, s6 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -8832,108 +8858,105 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: slli s5, s5, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, s5 -; RV64IM-NEXT: sd a0, 312(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld s5, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, s5, a0 -; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, a0, t4 -; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, a0, t2 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, a0, t0 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a0, a6 -; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a0, a5 -; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a0, a2 -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor ra, a0, ra -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, a0, s6 +; RV64IM-NEXT: ld s4, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s5, s4 +; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s4, t2 +; RV64IM-NEXT: ld s4, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, s4, t0 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, s4, a6 +; RV64IM-NEXT: ld s4, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s4, a5 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor ra, s4, ra +; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s4, s6 ; RV64IM-NEXT: xor t4, s5, t4 -; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, t2, a0 -; RV64IM-NEXT: ld a0, 112(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t0, a0 +; RV64IM-NEXT: ld s4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, s4 +; RV64IM-NEXT: ld s4, 112(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, s4 ; RV64IM-NEXT: xor a6, a6, t5 ; RV64IM-NEXT: xor a5, a5, t3 ; RV64IM-NEXT: xor a2, a2, a7 ; RV64IM-NEXT: xor a3, ra, a3 ; RV64IM-NEXT: xor a1, a1, s0 ; RV64IM-NEXT: xor a7, s6, s7 -; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t4, a0 -; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, t2, a0 -; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t0, a0 -; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, a0 -; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld t3, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, t3 +; RV64IM-NEXT: ld t4, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, t2, t4 +; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, t4 +; RV64IM-NEXT: ld t4, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t4 +; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t4 ; RV64IM-NEXT: xor a2, a2, s11 ; RV64IM-NEXT: xor a3, a3, t1 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a7, a7, s8 -; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t0, a0 -; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, a0 -; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: ld t1, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t0, t1 +; RV64IM-NEXT: ld t1, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t1 +; RV64IM-NEXT: ld t1, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 ; RV64IM-NEXT: xor a3, a3, t6 ; RV64IM-NEXT: xor a1, a1, s2 ; RV64IM-NEXT: xor a7, a7, s9 ; RV64IM-NEXT: xor t1, t3, t2 ; RV64IM-NEXT: xor t0, t1, t0 -; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, a0 -; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a0 -; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld t1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t1 +; RV64IM-NEXT: ld t1, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t1 +; RV64IM-NEXT: ld t1, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t1 ; RV64IM-NEXT: xor a1, a1, s3 ; RV64IM-NEXT: xor a7, a7, s10 ; RV64IM-NEXT: xor a6, t0, a6 ; RV64IM-NEXT: xor a4, a5, a4 -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a0 -; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 -; RV64IM-NEXT: xor a1, a1, s4 -; RV64IM-NEXT: lui a5, %hi(.LCPI15_0) -; RV64IM-NEXT: ld a5, %lo(.LCPI15_0)(a5) +; RV64IM-NEXT: ld a5, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a5 +; RV64IM-NEXT: ld a5, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a5 ; RV64IM-NEXT: slli t3, t3, 56 -; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a5 ; RV64IM-NEXT: xor a0, a7, a0 -; RV64IM-NEXT: ld t1, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a7, a6, t1 +; RV64IM-NEXT: ld t0, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a6, t0 ; RV64IM-NEXT: xor a4, a6, a4 -; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: slli a5, a5, 40 ; RV64IM-NEXT: xor a2, a4, a2 -; RV64IM-NEXT: or a4, t3, a7 -; RV64IM-NEXT: lui t0, 4080 -; RV64IM-NEXT: and a6, a2, t0 +; RV64IM-NEXT: or a4, t3, a5 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a2, a7 ; RV64IM-NEXT: xor a3, a2, a3 ; RV64IM-NEXT: srli a2, a2, 8 -; RV64IM-NEXT: slli a6, a6, 24 +; RV64IM-NEXT: slli a5, a5, 24 ; RV64IM-NEXT: xor a1, a3, a1 -; RV64IM-NEXT: ld a7, 384(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: ld a6, 384(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a2, a2, a6 ; RV64IM-NEXT: srli a3, a3, 24 -; RV64IM-NEXT: srliw a7, a1, 24 -; RV64IM-NEXT: and a3, a3, t0 -; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: srli a7, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: slli a6, a6, 32 ; RV64IM-NEXT: or a2, a2, a3 -; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: and a1, a7, t0 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a6, a7 +; RV64IM-NEXT: or a3, a5, a6 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -8953,10 +8976,9 @@ define i4 @clmulh_i4_bitreverse(i4 %a, i4 %b) nounwind { ; RV64IM-NEXT: srli a1, a0, 1 ; RV64IM-NEXT: ld a2, 360(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a5 +; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: slli a0, a0, 1 ; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a0, a0, 1 ; RV64IM-NEXT: ld ra, 488(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 480(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 472(sp) # 8-byte Folded Reload @@ -9241,9 +9263,8 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV32IM-NEXT: andi a1, a0, 85 ; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: slli a1, a1, 1 -; RV32IM-NEXT: andi a0, a0, 340 +; RV32IM-NEXT: andi a0, a0, 85 ; RV32IM-NEXT: or a0, a0, a1 -; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -9276,82 +9297,83 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd s9, 392(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill -; RV64IM-NEXT: srli a3, a0, 24 -; RV64IM-NEXT: srli a7, a0, 8 +; RV64IM-NEXT: srli a4, a0, 24 +; RV64IM-NEXT: srli a6, a0, 8 ; RV64IM-NEXT: li s4, 255 -; RV64IM-NEXT: srli a4, a0, 40 -; RV64IM-NEXT: lui s10, 16 +; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: lui s8, 16 ; RV64IM-NEXT: srli t1, a0, 56 -; RV64IM-NEXT: srliw t4, a0, 24 -; RV64IM-NEXT: slli a5, a0, 56 +; RV64IM-NEXT: srliw t3, a0, 24 +; RV64IM-NEXT: slli t4, a0, 56 ; RV64IM-NEXT: lui s3, 61681 ; RV64IM-NEXT: lui t5, 209715 ; RV64IM-NEXT: lui s6, 349525 ; RV64IM-NEXT: srli s9, a1, 24 ; RV64IM-NEXT: srli s0, a1, 8 -; RV64IM-NEXT: srli ra, a1, 40 +; RV64IM-NEXT: srli a7, a1, 40 ; RV64IM-NEXT: srli t2, a1, 56 ; RV64IM-NEXT: srliw s11, a1, 24 -; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: slli a3, a1, 56 ; RV64IM-NEXT: li t0, 1 ; RV64IM-NEXT: lui s1, 128 ; RV64IM-NEXT: lui s2, 256 ; RV64IM-NEXT: lui t6, 4096 ; RV64IM-NEXT: lui s5, 8192 ; RV64IM-NEXT: lui s7, 4080 -; RV64IM-NEXT: and a2, a3, s7 -; RV64IM-NEXT: slli t3, s4, 24 -; RV64IM-NEXT: addi s8, s10, -256 -; RV64IM-NEXT: and a3, a7, t3 -; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a3, a0, s7 -; RV64IM-NEXT: slli t4, t4, 32 +; RV64IM-NEXT: and a2, a4, s7 +; RV64IM-NEXT: slli ra, s4, 24 +; RV64IM-NEXT: addi s10, s8, -256 +; RV64IM-NEXT: and a4, a6, ra +; RV64IM-NEXT: sd ra, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a4, a2 +; RV64IM-NEXT: and a4, a0, s7 +; RV64IM-NEXT: slli t3, t3, 32 ; RV64IM-NEXT: addi s3, s3, -241 ; RV64IM-NEXT: addi s4, t5, 819 ; RV64IM-NEXT: addi s6, s6, 1365 -; RV64IM-NEXT: and a7, s9, s7 -; RV64IM-NEXT: and a4, a4, s8 -; RV64IM-NEXT: or a4, a4, t1 +; RV64IM-NEXT: and a6, s9, s7 +; RV64IM-NEXT: and a5, a5, s10 +; RV64IM-NEXT: or a5, a5, t1 ; RV64IM-NEXT: and t1, a1, s7 ; RV64IM-NEXT: slli t5, s11, 32 -; RV64IM-NEXT: slli a3, a3, 24 -; RV64IM-NEXT: or s9, a3, t4 -; RV64IM-NEXT: slli a3, s3, 32 -; RV64IM-NEXT: add s3, s3, a3 -; RV64IM-NEXT: slli a3, s4, 32 -; RV64IM-NEXT: add s4, s4, a3 -; RV64IM-NEXT: slli a3, s6, 32 -; RV64IM-NEXT: add s6, s6, a3 -; RV64IM-NEXT: slli t4, t0, 11 -; RV64IM-NEXT: and a3, s0, t3 -; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a4, a4, 24 +; RV64IM-NEXT: or s9, a4, t3 +; RV64IM-NEXT: slli a4, s3, 32 +; RV64IM-NEXT: add s3, s3, a4 +; RV64IM-NEXT: slli a4, s4, 32 +; RV64IM-NEXT: add s4, s4, a4 +; RV64IM-NEXT: slli a4, s6, 32 +; RV64IM-NEXT: add s6, s6, a4 +; RV64IM-NEXT: slli t3, t0, 11 +; RV64IM-NEXT: and a4, s0, ra +; RV64IM-NEXT: or a4, a4, a6 ; RV64IM-NEXT: slli s11, t0, 32 -; RV64IM-NEXT: and a7, ra, s8 -; RV64IM-NEXT: or a7, a7, t2 +; RV64IM-NEXT: and a6, a7, s10 +; RV64IM-NEXT: or a6, a6, t2 ; RV64IM-NEXT: slli ra, t0, 33 ; RV64IM-NEXT: slli t1, t1, 24 -; RV64IM-NEXT: or t1, t1, t5 +; RV64IM-NEXT: or a7, t1, t5 ; RV64IM-NEXT: slli s0, t0, 34 -; RV64IM-NEXT: or a2, a2, a4 -; RV64IM-NEXT: slli a4, t0, 35 -; RV64IM-NEXT: sd a4, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s8 -; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a2, a5 +; RV64IM-NEXT: slli a5, t0, 35 +; RV64IM-NEXT: sd a5, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s10, 344(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s10 ; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, a5, a0 -; RV64IM-NEXT: slli a4, t0, 36 -; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a3, a3, a7 -; RV64IM-NEXT: slli a7, t0, 37 -; RV64IM-NEXT: and a1, a1, s8 +; RV64IM-NEXT: or a0, t4, a0 +; RV64IM-NEXT: slli a5, t0, 36 +; RV64IM-NEXT: sd a5, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a4, a4, a6 +; RV64IM-NEXT: slli a6, t0, 37 +; RV64IM-NEXT: and a1, a1, s10 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a6, a1 -; RV64IM-NEXT: slli a6, t0, 38 +; RV64IM-NEXT: or a1, a3, a1 +; RV64IM-NEXT: slli a3, t0, 38 +; RV64IM-NEXT: sd a3, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: or a0, a0, s9 -; RV64IM-NEXT: or a1, a1, t1 +; RV64IM-NEXT: or a1, a1, a7 ; RV64IM-NEXT: or a0, a0, a2 -; RV64IM-NEXT: or a1, a1, a3 +; RV64IM-NEXT: or a1, a1, a4 ; RV64IM-NEXT: srli a2, a0, 4 ; RV64IM-NEXT: sd s3, 360(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a0, a0, s3 @@ -9408,7 +9430,7 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: xor a1, a2, a1 ; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill -; RV64IM-NEXT: slli s3, t0, 39 +; RV64IM-NEXT: slli a7, t0, 39 ; RV64IM-NEXT: lui a1, 2 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: lui a2, 4 @@ -9423,52 +9445,51 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 41 ; RV64IM-NEXT: and a3, s6, t6 ; RV64IM-NEXT: and a4, s6, s5 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t0, 48 ; RV64IM-NEXT: and a4, s6, s11 ; RV64IM-NEXT: and a5, s6, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t0, 49 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 56 ; RV64IM-NEXT: and a2, s6, a3 ; RV64IM-NEXT: and a3, s6, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 57 ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: and a2, s6, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t0, 42 ; RV64IM-NEXT: slli ra, t0, 43 ; RV64IM-NEXT: slli a4, t0, 44 ; RV64IM-NEXT: slli t6, t0, 45 ; RV64IM-NEXT: slli s1, t0, 46 ; RV64IM-NEXT: slli s2, t0, 47 -; RV64IM-NEXT: slli s4, t0, 50 -; RV64IM-NEXT: slli s5, t0, 51 -; RV64IM-NEXT: slli a1, t0, 52 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s3, t0, 50 +; RV64IM-NEXT: slli s4, t0, 51 +; RV64IM-NEXT: slli s5, t0, 52 ; RV64IM-NEXT: slli a1, t0, 53 ; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 54 @@ -9485,7 +9506,7 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli t0, t0, 62 ; RV64IM-NEXT: sd t0, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, t4 +; RV64IM-NEXT: and a1, s6, t3 ; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui a3, 1 ; RV64IM-NEXT: and a1, s6, a3 @@ -9493,7 +9514,7 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s6, a3 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s10 +; RV64IM-NEXT: and a1, s6, s8 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s9, 32 ; RV64IM-NEXT: and a1, s6, s9 @@ -9525,15 +9546,16 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: and s11, s6, s0 ; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 -; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, a7 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a6 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s3 +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s6, a7 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, a2 ; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill @@ -9546,11 +9568,10 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, s2 ; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, s4 +; RV64IM-NEXT: and a1, s6, s3 ; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and s0, s6, s5 -; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and s1, s6, a1 +; RV64IM-NEXT: and s0, s6, s4 +; RV64IM-NEXT: and s1, s6, s5 ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s2, s6, a1 ; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload @@ -9574,14 +9595,14 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: srli s6, s6, 63 ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul t3, a0, a3 ; RV64IM-NEXT: ld a1, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 136(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t1, a0, a1 ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload @@ -9589,17 +9610,17 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: sd a1, 104(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 152(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t6, a0, a1 ; RV64IM-NEXT: ld a1, 112(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a6, a0, a1 ; RV64IM-NEXT: mul t5, a0, t2 @@ -9607,25 +9628,25 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a5 -; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, t0 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a3, a0, s11 -; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t0, a0, a1 ; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul t0, a0, a1 +; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 144(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 280(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a2, a0, a1 ; RV64IM-NEXT: mul a5, a0, ra @@ -9637,7 +9658,7 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul ra, a0, a1 ; RV64IM-NEXT: ld a1, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 0(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul s0, a0, s0 @@ -9645,6 +9666,7 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: mul s2, a0, s2 ; RV64IM-NEXT: mul s3, a0, s3 ; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 168(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s5, a0, s5 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -9653,83 +9675,80 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: slli s6, s6, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, s6 -; RV64IM-NEXT: sd a0, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld s6, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, s6, a0 -; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, a0, t4 -; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, a0, t3 -; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, a0, t1 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a0, a7 -; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a0, a6 -; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a0, a3 -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a0, a2 -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s5, a0, s5 +; RV64IM-NEXT: ld s4, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s6, s4 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, s4, t3 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s4, t1 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, s4, a7 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, s4, a6 +; RV64IM-NEXT: ld s4, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, s4, a3 +; RV64IM-NEXT: ld s4, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s5, s4, s5 ; RV64IM-NEXT: xor t4, s6, t4 -; RV64IM-NEXT: ld a0, 136(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, a0 -; RV64IM-NEXT: ld a0, 104(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, a0 +; RV64IM-NEXT: ld s4, 136(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, s4 +; RV64IM-NEXT: ld s4, 104(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, s4 ; RV64IM-NEXT: xor a7, a7, t6 ; RV64IM-NEXT: xor a6, a6, t5 ; RV64IM-NEXT: xor a3, a3, t0 ; RV64IM-NEXT: xor a2, a2, a5 ; RV64IM-NEXT: xor a1, a1, s0 ; RV64IM-NEXT: xor a5, s5, s7 -; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t4, a0 -; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, a0 -; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, a0 -; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, a0 -; RV64IM-NEXT: ld a0, 160(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, a0 -; RV64IM-NEXT: ld a0, 144(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld t0, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t4, t0 +; RV64IM-NEXT: ld t4, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, t4 +; RV64IM-NEXT: ld t4, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t4 +; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t4 +; RV64IM-NEXT: ld t4, 160(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t4 +; RV64IM-NEXT: ld t4, 144(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t4 ; RV64IM-NEXT: xor a2, a2, t2 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a5, a5, s8 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, a0 -; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, a0 -; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld t2, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 ; RV64IM-NEXT: xor a2, a2, s11 ; RV64IM-NEXT: xor a1, a1, s2 ; RV64IM-NEXT: xor a5, a5, s9 ; RV64IM-NEXT: xor t2, t0, t3 ; RV64IM-NEXT: xor t1, t2, t1 -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, a6, a0 -; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 +; RV64IM-NEXT: ld t2, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, a6, t2 +; RV64IM-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, t2 ; RV64IM-NEXT: xor a2, a2, ra ; RV64IM-NEXT: xor a1, a1, s3 ; RV64IM-NEXT: xor a5, a5, s10 ; RV64IM-NEXT: xor a7, t1, a7 ; RV64IM-NEXT: xor a4, a6, a4 -; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a3, a3, a0 -; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a0 -; RV64IM-NEXT: xor a1, a1, s4 -; RV64IM-NEXT: lui a6, %hi(.LCPI16_0) -; RV64IM-NEXT: ld a6, %lo(.LCPI16_0)(a6) +; RV64IM-NEXT: ld a6, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a3, a3, a6 +; RV64IM-NEXT: ld a6, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a6 ; RV64IM-NEXT: slli t0, t0, 56 -; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a6 ; RV64IM-NEXT: xor a0, a5, a0 ; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a5, a7, t1 @@ -9737,24 +9756,24 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: slli a5, a5, 40 ; RV64IM-NEXT: xor a3, a4, a3 ; RV64IM-NEXT: or a4, t0, a5 -; RV64IM-NEXT: lui t0, 4080 -; RV64IM-NEXT: and a5, a3, t0 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a3, a7 ; RV64IM-NEXT: xor a2, a3, a2 ; RV64IM-NEXT: srli a3, a3, 8 ; RV64IM-NEXT: slli a5, a5, 24 ; RV64IM-NEXT: xor a1, a2, a1 -; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a3, a3, a7 +; RV64IM-NEXT: ld a6, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a6 ; RV64IM-NEXT: srli a2, a2, 24 -; RV64IM-NEXT: srliw a7, a1, 24 -; RV64IM-NEXT: and a2, a2, t0 -; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: srliw a6, a1, 24 +; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: srli a7, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli a7, a7, 32 +; RV64IM-NEXT: slli a6, a6, 32 ; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: and a1, a7, t1 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a5, a7 +; RV64IM-NEXT: or a3, a5, a6 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -9771,13 +9790,11 @@ define i8 @clmulh_i8(i8 %a, i8 %b) nounwind { ; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: slli a0, a0, 2 ; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: srli a1, a0, 1 +; RV64IM-NEXT: andi a1, a0, 85 +; RV64IM-NEXT: srli a0, a0, 1 +; RV64IM-NEXT: slli a1, a1, 1 ; RV64IM-NEXT: andi a0, a0, 85 -; RV64IM-NEXT: and a1, a1, a6 -; RV64IM-NEXT: slli a0, a0, 1 -; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: slli a0, a0, 55 -; RV64IM-NEXT: srli a0, a0, 56 +; RV64IM-NEXT: or a0, a0, a1 ; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload @@ -9822,8 +9839,8 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: lui ra, 16 ; RV32IM-NEXT: srli t1, a0, 24 ; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui s10, 61681 -; RV32IM-NEXT: lui t2, 209715 +; RV32IM-NEXT: lui s6, 61681 +; RV32IM-NEXT: lui t3, 209715 ; RV32IM-NEXT: lui a4, 349525 ; RV32IM-NEXT: srli t4, a1, 8 ; RV32IM-NEXT: srli t5, a1, 24 @@ -9831,55 +9848,55 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: li t6, 1 ; RV32IM-NEXT: lui a7, 2 ; RV32IM-NEXT: lui a6, 4 -; RV32IM-NEXT: lui s2, 8 -; RV32IM-NEXT: lui s0, 32 -; RV32IM-NEXT: lui s1, 64 -; RV32IM-NEXT: lui t3, 128 -; RV32IM-NEXT: lui s3, 256 -; RV32IM-NEXT: lui s4, 512 -; RV32IM-NEXT: lui s6, 1024 +; RV32IM-NEXT: lui t2, 8 +; RV32IM-NEXT: lui s1, 32 +; RV32IM-NEXT: lui s0, 64 +; RV32IM-NEXT: lui s3, 128 +; RV32IM-NEXT: lui s4, 256 +; RV32IM-NEXT: lui s5, 512 +; RV32IM-NEXT: lui s8, 1024 ; RV32IM-NEXT: lui s7, 2048 -; RV32IM-NEXT: lui s8, 4096 -; RV32IM-NEXT: lui s9, 8192 +; RV32IM-NEXT: lui s9, 4096 +; RV32IM-NEXT: lui s10, 8192 ; RV32IM-NEXT: lui s11, 16384 -; RV32IM-NEXT: addi s5, ra, -256 -; RV32IM-NEXT: sw s5, 88(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and t0, t0, s5 +; RV32IM-NEXT: addi s2, ra, -256 +; RV32IM-NEXT: sw s2, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t0, t0, s2 ; RV32IM-NEXT: or t1, t0, t1 ; RV32IM-NEXT: lui a3, 32768 -; RV32IM-NEXT: and t4, t4, s5 +; RV32IM-NEXT: and t4, t4, s2 ; RV32IM-NEXT: or t5, t4, t5 ; RV32IM-NEXT: lui t0, 65536 -; RV32IM-NEXT: and a0, a0, s5 +; RV32IM-NEXT: and a0, a0, s2 ; RV32IM-NEXT: slli a0, a0, 8 ; RV32IM-NEXT: or a2, a2, a0 ; RV32IM-NEXT: lui t4, 131072 -; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: and a1, a1, s2 ; RV32IM-NEXT: slli a1, a1, 8 ; RV32IM-NEXT: or a0, a5, a1 ; RV32IM-NEXT: lui a5, 262144 -; RV32IM-NEXT: addi s5, s10, -241 -; RV32IM-NEXT: addi s10, t2, 819 +; RV32IM-NEXT: addi s2, s6, -241 +; RV32IM-NEXT: addi s6, t3, 819 ; RV32IM-NEXT: addi a4, a4, 1365 ; RV32IM-NEXT: or a2, a2, t1 ; RV32IM-NEXT: or a0, a0, t5 ; RV32IM-NEXT: srli t1, a2, 4 -; RV32IM-NEXT: and a2, a2, s5 +; RV32IM-NEXT: and a2, a2, s2 ; RV32IM-NEXT: srli t5, a0, 4 -; RV32IM-NEXT: and a0, a0, s5 -; RV32IM-NEXT: and t1, t1, s5 +; RV32IM-NEXT: and a0, a0, s2 +; RV32IM-NEXT: and t1, t1, s2 ; RV32IM-NEXT: slli a2, a2, 4 -; RV32IM-NEXT: and t5, t5, s5 +; RV32IM-NEXT: and t5, t5, s2 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a2, t1, a2 ; RV32IM-NEXT: or a0, t5, a0 ; RV32IM-NEXT: srli t1, a2, 2 -; RV32IM-NEXT: and a2, a2, s10 +; RV32IM-NEXT: and a2, a2, s6 ; RV32IM-NEXT: srli t5, a0, 2 -; RV32IM-NEXT: and a0, a0, s10 -; RV32IM-NEXT: and t1, t1, s10 +; RV32IM-NEXT: and a0, a0, s6 +; RV32IM-NEXT: and t1, t1, s6 ; RV32IM-NEXT: slli a2, a2, 2 -; RV32IM-NEXT: and t5, t5, s10 +; RV32IM-NEXT: and t5, t5, s6 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a2, t1, a2 ; RV32IM-NEXT: or a0, t5, a0 @@ -9895,7 +9912,7 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a4, t1, a2 ; RV32IM-NEXT: or a0, t5, a0 -; RV32IM-NEXT: andi t2, a0, 2 +; RV32IM-NEXT: andi t3, a0, 2 ; RV32IM-NEXT: andi t5, a0, 1 ; RV32IM-NEXT: and t6, a0, t6 ; RV32IM-NEXT: lui a2, 1 @@ -9905,37 +9922,36 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: sw a2, 76(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, a6 ; RV32IM-NEXT: sw a2, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s2 -; RV32IM-NEXT: sw a2, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t2, a0, t2 ; RV32IM-NEXT: and ra, a0, ra -; RV32IM-NEXT: and s0, a0, s0 ; RV32IM-NEXT: and s1, a0, s1 -; RV32IM-NEXT: sw s1, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, t3 +; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 +; RV32IM-NEXT: and s3, a0, s3 +; RV32IM-NEXT: and a2, a0, s4 +; RV32IM-NEXT: sw a2, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s5 ; RV32IM-NEXT: sw a2, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s3 +; RV32IM-NEXT: and a2, a0, s8 ; RV32IM-NEXT: sw a2, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s4, a0, s4 -; RV32IM-NEXT: and a2, a0, s6 -; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, s7 +; RV32IM-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a2, a0, s9 ; RV32IM-NEXT: sw a2, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s8 +; RV32IM-NEXT: and a2, a0, s10 ; RV32IM-NEXT: sw a2, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a2, a0, s9 -; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, s11 -; RV32IM-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a2, 40(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a3, a0, a3 -; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, t0 -; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a2, 32(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, t4 -; RV32IM-NEXT: sw a2, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a5, a0, a5 -; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a5, 24(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a1, a0, a1 -; RV32IM-NEXT: sw a1, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill ; RV32IM-NEXT: andi a1, a0, 4 ; RV32IM-NEXT: andi a2, a0, 8 ; RV32IM-NEXT: andi a3, a0, 16 @@ -9945,130 +9961,126 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV32IM-NEXT: andi t0, a0, 256 ; RV32IM-NEXT: andi t1, a0, 512 ; RV32IM-NEXT: andi a0, a0, 1024 -; RV32IM-NEXT: mul s11, a4, t2 -; RV32IM-NEXT: mul s7, a4, t5 +; RV32IM-NEXT: mul s11, a4, t3 +; RV32IM-NEXT: mul s9, a4, t5 ; RV32IM-NEXT: mul s8, a4, a1 -; RV32IM-NEXT: mul s3, a4, a2 -; RV32IM-NEXT: mul s2, a4, a3 +; RV32IM-NEXT: mul s4, a4, a2 +; RV32IM-NEXT: mul s5, a4, a3 ; RV32IM-NEXT: mul s1, a4, a5 ; RV32IM-NEXT: mul a1, a4, a6 -; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a1, 8(sp) # 4-byte Folded Spill ; RV32IM-NEXT: mul a1, a4, a7 ; RV32IM-NEXT: sw a1, 80(sp) # 4-byte Folded Spill ; RV32IM-NEXT: mul t5, a4, t0 ; RV32IM-NEXT: mul t3, a4, t1 -; RV32IM-NEXT: mul s9, a4, a0 +; RV32IM-NEXT: mul s10, a4, a0 ; RV32IM-NEXT: mul a0, a4, t6 -; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul t1, a4, a0 ; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a7, a4, a0 +; RV32IM-NEXT: mul t6, a4, t2 +; RV32IM-NEXT: mul s7, a4, ra ; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t6, a4, a0 -; RV32IM-NEXT: mul s6, a4, ra -; RV32IM-NEXT: mul a0, a4, s0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, s0 ; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, a4, s3 +; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a2, a4, a0 ; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a3, a4, a0 +; RV32IM-NEXT: mul a6, a4, a0 ; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a2, a4, a0 -; RV32IM-NEXT: mul a6, a4, s4 -; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul t2, a4, a0 +; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a4, a0 ; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s4, a4, a0 -; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a1, a4, a0 -; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a0, a4, a0 -; RV32IM-NEXT: lw a5, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a5, a4, a5 -; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul t0, a4, t0 -; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw t4, 32(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul t4, a4, t4 -; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul s0, a4, s0 -; RV32IM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 24(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul ra, a4, ra ; RV32IM-NEXT: sw ra, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw ra, 20(sp) # 4-byte Folded Reload ; RV32IM-NEXT: mul a4, a4, ra -; RV32IM-NEXT: xor s7, s7, s11 -; RV32IM-NEXT: xor s3, s8, s3 -; RV32IM-NEXT: xor s1, s2, s1 +; RV32IM-NEXT: xor s9, s9, s11 +; RV32IM-NEXT: xor s4, s8, s4 +; RV32IM-NEXT: xor s1, s5, s1 ; RV32IM-NEXT: xor t3, t5, t3 ; RV32IM-NEXT: xor a7, t1, a7 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a1, a0 -; RV32IM-NEXT: xor a1, s7, s3 -; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, s9, s4 +; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, s1, a3 -; RV32IM-NEXT: xor t1, t3, s9 +; RV32IM-NEXT: xor t1, t3, s10 ; RV32IM-NEXT: xor a7, a7, t6 ; RV32IM-NEXT: xor a2, a2, a6 ; RV32IM-NEXT: xor a0, a0, a5 ; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, t1, a3 -; RV32IM-NEXT: xor a5, a7, s6 +; RV32IM-NEXT: xor a5, a7, s7 ; RV32IM-NEXT: xor a2, a2, t2 ; RV32IM-NEXT: xor a0, a0, t0 ; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a1, a1, a6 -; RV32IM-NEXT: lw a6, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a6, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, a3, a6 ; RV32IM-NEXT: lw a6, 72(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a5, a5, a6 -; RV32IM-NEXT: xor a2, a2, s4 +; RV32IM-NEXT: xor a2, a2, s3 ; RV32IM-NEXT: xor a0, a0, t4 ; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a5, a5, a6 ; RV32IM-NEXT: xor a0, a0, s0 ; RV32IM-NEXT: xor a3, a1, a3 ; RV32IM-NEXT: xor a3, a3, a5 -; RV32IM-NEXT: lui a5, 21 +; RV32IM-NEXT: lui a5, 5 +; RV32IM-NEXT: addi a5, a5, 1365 +; RV32IM-NEXT: slli a1, a1, 24 ; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a0, a0, a6 -; RV32IM-NEXT: lui a6, 5 -; RV32IM-NEXT: addi a5, a5, 1364 -; RV32IM-NEXT: addi a6, a6, 1365 -; RV32IM-NEXT: slli a1, a1, 24 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a0, a4 -; RV32IM-NEXT: lw a7, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a3, a2, a7 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: and a3, a2, a6 ; RV32IM-NEXT: srli a4, a2, 8 ; RV32IM-NEXT: xor a0, a2, a0 ; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, a7 +; RV32IM-NEXT: and a2, a4, a6 ; RV32IM-NEXT: srli a0, a0, 24 ; RV32IM-NEXT: or a1, a1, a3 ; RV32IM-NEXT: or a0, a2, a0 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: and a0, a0, s5 -; RV32IM-NEXT: and a1, a1, s5 +; RV32IM-NEXT: and a0, a0, s2 +; RV32IM-NEXT: and a1, a1, s2 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: and a0, a0, s10 -; RV32IM-NEXT: and a1, a1, s10 +; RV32IM-NEXT: and a0, a0, s6 +; RV32IM-NEXT: and a1, a1, s6 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: and a0, a0, a6 +; RV32IM-NEXT: and a0, a0, a5 ; RV32IM-NEXT: and a1, a1, a5 ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -10102,14 +10114,14 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: sd s10, 384(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 376(sp) # 8-byte Folded Spill ; RV64IM-NEXT: srli a3, a0, 24 -; RV64IM-NEXT: srli a6, a0, 8 +; RV64IM-NEXT: srli a7, a0, 8 ; RV64IM-NEXT: li s4, 255 ; RV64IM-NEXT: srli a4, a0, 40 -; RV64IM-NEXT: lui s3, 16 +; RV64IM-NEXT: lui s10, 16 ; RV64IM-NEXT: srli t1, a0, 56 ; RV64IM-NEXT: srliw t4, a0, 24 -; RV64IM-NEXT: slli a7, a0, 56 -; RV64IM-NEXT: lui t3, 61681 +; RV64IM-NEXT: slli a5, a0, 56 +; RV64IM-NEXT: lui s3, 61681 ; RV64IM-NEXT: lui t5, 209715 ; RV64IM-NEXT: lui s6, 349525 ; RV64IM-NEXT: srli s9, a1, 24 @@ -10117,7 +10129,7 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: srli ra, a1, 40 ; RV64IM-NEXT: srli t2, a1, 56 ; RV64IM-NEXT: srliw s11, a1, 24 -; RV64IM-NEXT: slli a5, a1, 56 +; RV64IM-NEXT: slli a6, a1, 56 ; RV64IM-NEXT: li t0, 1 ; RV64IM-NEXT: lui s1, 128 ; RV64IM-NEXT: lui s2, 256 @@ -10125,21 +10137,21 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: lui s5, 8192 ; RV64IM-NEXT: lui s7, 4080 ; RV64IM-NEXT: and a2, a3, s7 -; RV64IM-NEXT: slli s10, s4, 24 -; RV64IM-NEXT: addi s8, s3, -256 -; RV64IM-NEXT: and a3, a6, s10 -; RV64IM-NEXT: sd s10, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli t3, s4, 24 +; RV64IM-NEXT: addi s8, s10, -256 +; RV64IM-NEXT: and a3, a7, t3 +; RV64IM-NEXT: sd t3, 368(sp) # 8-byte Folded Spill ; RV64IM-NEXT: or a2, a3, a2 ; RV64IM-NEXT: and a3, a0, s7 ; RV64IM-NEXT: slli t4, t4, 32 -; RV64IM-NEXT: addi s3, t3, -241 +; RV64IM-NEXT: addi s3, s3, -241 ; RV64IM-NEXT: addi s4, t5, 819 ; RV64IM-NEXT: addi s6, s6, 1365 -; RV64IM-NEXT: and a6, s9, s7 +; RV64IM-NEXT: and a7, s9, s7 ; RV64IM-NEXT: and a4, a4, s8 ; RV64IM-NEXT: or a4, a4, t1 ; RV64IM-NEXT: and t1, a1, s7 -; RV64IM-NEXT: slli t3, s11, 32 +; RV64IM-NEXT: slli t5, s11, 32 ; RV64IM-NEXT: slli a3, a3, 24 ; RV64IM-NEXT: or s9, a3, t4 ; RV64IM-NEXT: slli a3, s3, 32 @@ -10149,14 +10161,14 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: slli a3, s6, 32 ; RV64IM-NEXT: add s6, s6, a3 ; RV64IM-NEXT: slli t4, t0, 11 -; RV64IM-NEXT: and a3, s0, s10 -; RV64IM-NEXT: or a3, a3, a6 +; RV64IM-NEXT: and a3, s0, t3 +; RV64IM-NEXT: or a3, a3, a7 ; RV64IM-NEXT: slli s11, t0, 32 -; RV64IM-NEXT: and a6, ra, s8 -; RV64IM-NEXT: or a6, a6, t2 +; RV64IM-NEXT: and a7, ra, s8 +; RV64IM-NEXT: or a7, a7, t2 ; RV64IM-NEXT: slli ra, t0, 33 ; RV64IM-NEXT: slli t1, t1, 24 -; RV64IM-NEXT: or t1, t1, t3 +; RV64IM-NEXT: or t1, t1, t5 ; RV64IM-NEXT: slli s0, t0, 34 ; RV64IM-NEXT: or a2, a2, a4 ; RV64IM-NEXT: slli a4, t0, 35 @@ -10164,15 +10176,15 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: and a0, a0, s8 ; RV64IM-NEXT: sd s8, 344(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a0, a0, 40 -; RV64IM-NEXT: or a0, a7, a0 -; RV64IM-NEXT: slli a7, t0, 36 -; RV64IM-NEXT: or a3, a3, a6 -; RV64IM-NEXT: slli a6, t0, 37 +; RV64IM-NEXT: or a0, a5, a0 +; RV64IM-NEXT: slli a4, t0, 36 +; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a3, a3, a7 +; RV64IM-NEXT: slli a7, t0, 37 ; RV64IM-NEXT: and a1, a1, s8 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a5, a1 -; RV64IM-NEXT: slli a4, t0, 38 -; RV64IM-NEXT: sd a4, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: slli a6, t0, 38 ; RV64IM-NEXT: or a0, a0, s9 ; RV64IM-NEXT: or a1, a1, t1 ; RV64IM-NEXT: or a0, a0, a2 @@ -10241,7 +10253,7 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t0, 40 ; RV64IM-NEXT: and a2, s6, s1 ; RV64IM-NEXT: and a3, s6, s2 @@ -10318,8 +10330,7 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: lui a3, 8 ; RV64IM-NEXT: and a1, s6, a3 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: lui a1, 16 -; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a1, s6, s10 ; RV64IM-NEXT: sd a1, 160(sp) # 8-byte Folded Spill ; RV64IM-NEXT: lui s9, 32 ; RV64IM-NEXT: and a1, s6, s9 @@ -10352,12 +10363,12 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: ld a1, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: sd a1, 304(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, a7 +; RV64IM-NEXT: ld a1, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a1, s6, a1 ; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s6, a6 +; RV64IM-NEXT: and a1, s6, a7 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, s6, a1 +; RV64IM-NEXT: and a1, s6, a6 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and a1, s6, s3 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill @@ -10418,7 +10429,7 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 128(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a7, a0, a1 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload @@ -10487,7 +10498,7 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: xor t4, a0, t4 ; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor t3, a0, t3 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor t1, a0, t1 ; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a7, a0, a7 @@ -10527,7 +10538,7 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: xor a2, a2, t2 ; RV64IM-NEXT: xor a1, a1, s1 ; RV64IM-NEXT: xor a5, a5, s8 -; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor t1, t1, a0 ; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a6, a6, a0 @@ -10552,37 +10563,35 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a2, a2, a0 ; RV64IM-NEXT: xor a1, a1, s4 -; RV64IM-NEXT: lui a6, %hi(.LCPI17_0) +; RV64IM-NEXT: lui a6, 5 +; RV64IM-NEXT: addi a6, a6, 1365 +; RV64IM-NEXT: slli t0, t0, 56 ; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload ; RV64IM-NEXT: xor a0, a5, a0 -; RV64IM-NEXT: lui a5, 5 -; RV64IM-NEXT: ld a6, %lo(.LCPI17_0)(a6) -; RV64IM-NEXT: addi a5, a5, 1365 -; RV64IM-NEXT: slli t0, t0, 56 +; RV64IM-NEXT: ld t1, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a5, a7, t1 ; RV64IM-NEXT: xor a4, a7, a4 -; RV64IM-NEXT: ld t2, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a7, a7, t2 -; RV64IM-NEXT: slli a7, a7, 40 +; RV64IM-NEXT: slli a5, a5, 40 ; RV64IM-NEXT: xor a3, a4, a3 -; RV64IM-NEXT: or a4, t0, a7 -; RV64IM-NEXT: lui t1, 4080 -; RV64IM-NEXT: and a7, a3, t1 +; RV64IM-NEXT: or a4, t0, a5 +; RV64IM-NEXT: lui t0, 4080 +; RV64IM-NEXT: and a5, a3, t0 ; RV64IM-NEXT: xor a2, a3, a2 ; RV64IM-NEXT: srli a3, a3, 8 -; RV64IM-NEXT: slli a7, a7, 24 +; RV64IM-NEXT: slli a5, a5, 24 ; RV64IM-NEXT: xor a1, a2, a1 -; RV64IM-NEXT: ld t0, 368(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a3, a3, t0 +; RV64IM-NEXT: ld a7, 368(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and a3, a3, a7 ; RV64IM-NEXT: srli a2, a2, 24 -; RV64IM-NEXT: srliw t0, a1, 24 -; RV64IM-NEXT: and a2, a2, t1 -; RV64IM-NEXT: srli t1, a1, 40 +; RV64IM-NEXT: srliw a7, a1, 24 +; RV64IM-NEXT: and a2, a2, t0 +; RV64IM-NEXT: srli t0, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 -; RV64IM-NEXT: slli t0, t0, 32 +; RV64IM-NEXT: slli a7, a7, 32 ; RV64IM-NEXT: or a2, a3, a2 -; RV64IM-NEXT: and a1, t1, t2 +; RV64IM-NEXT: and a1, t0, t1 ; RV64IM-NEXT: srli a0, a0, 56 -; RV64IM-NEXT: or a3, a7, t0 +; RV64IM-NEXT: or a3, a5, a7 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: or a3, a4, a3 ; RV64IM-NEXT: or a0, a2, a0 @@ -10600,12 +10609,10 @@ define i16 @clmulh_i16(i16 %a, i16 %b) nounwind { ; RV64IM-NEXT: slli a0, a0, 2 ; RV64IM-NEXT: or a0, a1, a0 ; RV64IM-NEXT: srli a1, a0, 1 -; RV64IM-NEXT: and a0, a0, a5 +; RV64IM-NEXT: and a0, a0, a6 ; RV64IM-NEXT: and a1, a1, a6 ; RV64IM-NEXT: slli a0, a0, 1 ; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: slli a0, a0, 47 -; RV64IM-NEXT: srli a0, a0, 48 ; RV64IM-NEXT: ld ra, 472(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 464(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 456(sp) # 8-byte Folded Reload @@ -10650,253 +10657,250 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV32IM-NEXT: lui a3, 16 ; RV32IM-NEXT: srli t1, a0, 24 ; RV32IM-NEXT: slli a2, a0, 24 -; RV32IM-NEXT: lui s1, 61681 -; RV32IM-NEXT: lui s3, 209715 -; RV32IM-NEXT: lui a6, 349525 +; RV32IM-NEXT: lui t3, 61681 +; RV32IM-NEXT: lui t5, 209715 +; RV32IM-NEXT: lui t6, 349525 ; RV32IM-NEXT: srli t4, a1, 8 -; RV32IM-NEXT: srli t6, a1, 24 -; RV32IM-NEXT: slli a4, a1, 24 -; RV32IM-NEXT: li t3, 1 -; RV32IM-NEXT: lui s11, 2 +; RV32IM-NEXT: srli a4, a1, 24 +; RV32IM-NEXT: slli a5, a1, 24 +; RV32IM-NEXT: li s7, 1 ; RV32IM-NEXT: lui t2, 4 -; RV32IM-NEXT: lui s10, 8 -; RV32IM-NEXT: lui t5, 32 -; RV32IM-NEXT: lui s0, 64 -; RV32IM-NEXT: lui s2, 128 +; RV32IM-NEXT: lui s0, 8 +; RV32IM-NEXT: lui s1, 32 +; RV32IM-NEXT: lui s2, 64 +; RV32IM-NEXT: lui s3, 128 ; RV32IM-NEXT: lui s4, 256 -; RV32IM-NEXT: lui s5, 512 -; RV32IM-NEXT: lui s6, 1024 -; RV32IM-NEXT: lui s7, 2048 -; RV32IM-NEXT: lui s8, 4096 -; RV32IM-NEXT: lui s9, 8192 +; RV32IM-NEXT: lui s8, 512 +; RV32IM-NEXT: lui a7, 1024 +; RV32IM-NEXT: lui s9, 2048 +; RV32IM-NEXT: lui s10, 4096 +; RV32IM-NEXT: lui s11, 8192 ; RV32IM-NEXT: lui ra, 16384 -; RV32IM-NEXT: addi a3, a3, -256 -; RV32IM-NEXT: lui a5, 16 -; RV32IM-NEXT: and t0, t0, a3 +; RV32IM-NEXT: addi s5, a3, -256 +; RV32IM-NEXT: and t0, t0, s5 ; RV32IM-NEXT: or t1, t0, t1 -; RV32IM-NEXT: lui a7, 32768 -; RV32IM-NEXT: and t4, t4, a3 -; RV32IM-NEXT: or t6, t4, t6 +; RV32IM-NEXT: lui a6, 32768 +; RV32IM-NEXT: and t4, t4, s5 +; RV32IM-NEXT: or a4, t4, a4 ; RV32IM-NEXT: lui t0, 65536 -; RV32IM-NEXT: and a0, a0, a3 -; RV32IM-NEXT: mv t4, a3 -; RV32IM-NEXT: sw a3, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a0, a0, s5 ; RV32IM-NEXT: slli a0, a0, 8 -; RV32IM-NEXT: or a2, a2, a0 -; RV32IM-NEXT: lui a3, 131072 -; RV32IM-NEXT: and a1, a1, t4 +; RV32IM-NEXT: or a0, a2, a0 +; RV32IM-NEXT: lui a2, 131072 +; RV32IM-NEXT: and a1, a1, s5 ; RV32IM-NEXT: slli a1, a1, 8 -; RV32IM-NEXT: or a0, a4, a1 +; RV32IM-NEXT: or t4, a5, a1 ; RV32IM-NEXT: lui a1, 262144 -; RV32IM-NEXT: addi s1, s1, -241 -; RV32IM-NEXT: addi s3, s3, 819 -; RV32IM-NEXT: or a2, a2, t1 -; RV32IM-NEXT: addi a4, a6, 1365 -; RV32IM-NEXT: sw a4, 84(sp) # 4-byte Folded Spill -; RV32IM-NEXT: or a0, a0, t6 -; RV32IM-NEXT: srli a6, a2, 4 -; RV32IM-NEXT: and a2, a2, s1 -; RV32IM-NEXT: and a6, a6, s1 -; RV32IM-NEXT: slli a2, a2, 4 -; RV32IM-NEXT: or a2, a6, a2 -; RV32IM-NEXT: srli a6, a0, 4 -; RV32IM-NEXT: and a0, a0, s1 -; RV32IM-NEXT: and a6, a6, s1 +; RV32IM-NEXT: or a0, a0, t1 +; RV32IM-NEXT: lui a5, 524288 +; RV32IM-NEXT: addi t3, t3, -241 +; RV32IM-NEXT: addi t5, t5, 819 +; RV32IM-NEXT: addi t6, t6, 1365 +; RV32IM-NEXT: slli s7, s7, 11 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 4 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and t4, t4, t3 ; RV32IM-NEXT: slli a0, a0, 4 -; RV32IM-NEXT: or a0, a6, a0 -; RV32IM-NEXT: srli a6, a2, 2 -; RV32IM-NEXT: and a2, a2, s3 -; RV32IM-NEXT: and a6, a6, s3 -; RV32IM-NEXT: slli a2, a2, 2 -; RV32IM-NEXT: or a2, a6, a2 -; RV32IM-NEXT: srli a6, a0, 2 -; RV32IM-NEXT: and a0, a0, s3 -; RV32IM-NEXT: and a6, a6, s3 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 4 +; RV32IM-NEXT: and a4, a4, t3 +; RV32IM-NEXT: and t4, t4, t3 +; RV32IM-NEXT: slli a4, a4, 4 +; RV32IM-NEXT: or a4, t4, a4 +; RV32IM-NEXT: srli t4, a0, 2 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and t4, t4, t5 ; RV32IM-NEXT: slli a0, a0, 2 -; RV32IM-NEXT: or a0, a6, a0 -; RV32IM-NEXT: srli a6, a2, 1 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: and a6, a6, a4 -; RV32IM-NEXT: slli a2, a2, 1 -; RV32IM-NEXT: or a6, a6, a2 -; RV32IM-NEXT: srli a2, a0, 1 -; RV32IM-NEXT: and a0, a0, a4 -; RV32IM-NEXT: and a2, a2, a4 +; RV32IM-NEXT: or a0, t4, a0 +; RV32IM-NEXT: srli t4, a4, 2 +; RV32IM-NEXT: and a4, a4, t5 +; RV32IM-NEXT: and t4, t4, t5 +; RV32IM-NEXT: slli a4, a4, 2 +; RV32IM-NEXT: or t4, t4, a4 +; RV32IM-NEXT: srli a4, a0, 1 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a4, a4, t6 ; RV32IM-NEXT: slli a0, a0, 1 -; RV32IM-NEXT: or a0, a2, a0 -; RV32IM-NEXT: lui a2, 524288 -; RV32IM-NEXT: slli t3, t3, 11 -; RV32IM-NEXT: and t3, a0, t3 -; RV32IM-NEXT: lui a4, 1 -; RV32IM-NEXT: and t4, a0, a4 -; RV32IM-NEXT: and s11, a0, s11 -; RV32IM-NEXT: and a4, a0, t2 -; RV32IM-NEXT: sw a4, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s10 -; RV32IM-NEXT: sw a4, 72(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a5, a0, a5 -; RV32IM-NEXT: sw a5, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, t5 -; RV32IM-NEXT: sw a4, 64(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s0, a0, s0 -; RV32IM-NEXT: and a4, a0, s2 -; RV32IM-NEXT: sw a4, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and s4, a0, s4 -; RV32IM-NEXT: and a4, a0, s5 -; RV32IM-NEXT: sw a4, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s6 -; RV32IM-NEXT: sw a4, 52(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s7 -; RV32IM-NEXT: sw a4, 48(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s8 -; RV32IM-NEXT: sw a4, 44(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, s9 -; RV32IM-NEXT: sw a4, 40(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, ra -; RV32IM-NEXT: sw a4, 36(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, a7 -; RV32IM-NEXT: sw a4, 32(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a4, a0, t0 -; RV32IM-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: or a4, a4, a0 +; RV32IM-NEXT: srli a0, t4, 1 +; RV32IM-NEXT: and t4, t4, t6 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: slli t4, t4, 1 +; RV32IM-NEXT: or a0, a0, t4 +; RV32IM-NEXT: andi t4, a0, 2 +; RV32IM-NEXT: and s6, a0, s7 +; RV32IM-NEXT: lui t1, 1 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 84(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lui t1, 2 +; RV32IM-NEXT: and t1, a0, t1 +; RV32IM-NEXT: sw t1, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and t1, a0, t2 +; RV32IM-NEXT: sw t1, 76(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s0, a0, s0 ; RV32IM-NEXT: and a3, a0, a3 -; RV32IM-NEXT: sw a3, 24(sp) # 4-byte Folded Spill -; RV32IM-NEXT: and a1, a0, a1 -; RV32IM-NEXT: sw a1, 20(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw a3, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s1, a0, s1 +; RV32IM-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s2 +; RV32IM-NEXT: sw a3, 64(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s3, a0, s3 +; RV32IM-NEXT: and a3, a0, s4 +; RV32IM-NEXT: sw a3, 60(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s8 +; RV32IM-NEXT: sw a3, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a7 +; RV32IM-NEXT: sw a3, 52(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and s9, a0, s9 +; RV32IM-NEXT: and a3, a0, s10 +; RV32IM-NEXT: sw a3, 48(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, s11 +; RV32IM-NEXT: sw a3, 44(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, ra +; RV32IM-NEXT: sw a3, 40(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, a6 +; RV32IM-NEXT: sw a3, 36(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a3, a0, t0 +; RV32IM-NEXT: sw a3, 32(sp) # 4-byte Folded Spill ; RV32IM-NEXT: and a2, a0, a2 -; RV32IM-NEXT: sw a2, 16(sp) # 4-byte Folded Spill -; RV32IM-NEXT: andi ra, a0, 2 +; RV32IM-NEXT: sw a2, 28(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a1, a0, a1 +; RV32IM-NEXT: sw a1, 24(sp) # 4-byte Folded Spill +; RV32IM-NEXT: and a5, a0, a5 +; RV32IM-NEXT: sw a5, 20(sp) # 4-byte Folded Spill ; RV32IM-NEXT: andi a1, a0, 1 ; RV32IM-NEXT: andi a2, a0, 4 ; RV32IM-NEXT: andi a3, a0, 8 -; RV32IM-NEXT: andi a4, a0, 16 -; RV32IM-NEXT: andi a5, a0, 32 +; RV32IM-NEXT: andi a5, a0, 16 +; RV32IM-NEXT: andi a6, a0, 32 ; RV32IM-NEXT: andi a7, a0, 64 ; RV32IM-NEXT: andi t0, a0, 128 ; RV32IM-NEXT: andi t1, a0, 256 ; RV32IM-NEXT: andi t2, a0, 512 ; RV32IM-NEXT: andi a0, a0, 1024 -; RV32IM-NEXT: mul ra, a6, ra -; RV32IM-NEXT: mul s10, a6, a1 -; RV32IM-NEXT: mul s9, a6, a2 -; RV32IM-NEXT: mul s5, a6, a3 -; RV32IM-NEXT: mul s6, a6, a4 -; RV32IM-NEXT: mul s2, a6, a5 -; RV32IM-NEXT: mul a1, a6, a7 -; RV32IM-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a1, a6, t0 -; RV32IM-NEXT: sw a1, 76(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t6, a6, t1 -; RV32IM-NEXT: mul t2, a6, t2 -; RV32IM-NEXT: mul s7, a6, a0 -; RV32IM-NEXT: mul a0, a6, t3 -; RV32IM-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a6, t4 -; RV32IM-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul t1, a6, s11 +; RV32IM-NEXT: mul t4, a4, t4 +; RV32IM-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul ra, a4, a1 +; RV32IM-NEXT: mul s11, a4, a2 +; RV32IM-NEXT: mul s8, a4, a3 +; RV32IM-NEXT: mul s7, a4, a5 +; RV32IM-NEXT: mul s4, a4, a6 +; RV32IM-NEXT: mul a1, a4, a7 +; RV32IM-NEXT: sw a1, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a1, a4, t0 +; RV32IM-NEXT: sw a1, 88(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul s2, a4, t1 +; RV32IM-NEXT: mul t2, a4, t2 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, s6 +; RV32IM-NEXT: sw a0, 16(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a0, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 84(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a7, a6, a0 +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: lw a0, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a7, a4, a0 +; RV32IM-NEXT: mul s1, a4, s0 ; RV32IM-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t5, a6, a0 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s8, a6, a0 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 76(sp) # 4-byte Folded Spill ; RV32IM-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a6, a0 -; RV32IM-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32IM-NEXT: mul a0, a6, s0 -; RV32IM-NEXT: sw a0, 72(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: sw a0, 80(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mul a3, a4, s3 ; RV32IM-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a3, a6, a0 -; RV32IM-NEXT: mul a2, a6, s4 +; RV32IM-NEXT: mul a2, a4, a0 ; RV32IM-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a5, a6, a0 +; RV32IM-NEXT: mul a6, a4, a0 ; RV32IM-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t3, a6, a0 +; RV32IM-NEXT: mul t4, a4, a0 +; RV32IM-NEXT: mul s6, a4, s9 ; RV32IM-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s4, a6, a0 +; RV32IM-NEXT: mul a1, a4, a0 ; RV32IM-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a1, a6, a0 -; RV32IM-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a0, a6, a0 -; RV32IM-NEXT: lw a4, 36(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a4, a6, a4 -; RV32IM-NEXT: lw t0, 32(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t0, a6, t0 -; RV32IM-NEXT: lw t4, 28(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul t4, a6, t4 -; RV32IM-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s0, a6, s0 -; RV32IM-NEXT: lw s11, 20(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul s11, a6, s11 -; RV32IM-NEXT: sw s11, 80(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s11, 16(sp) # 4-byte Folded Reload -; RV32IM-NEXT: mul a6, a6, s11 -; RV32IM-NEXT: xor s10, s10, ra -; RV32IM-NEXT: xor s5, s9, s5 -; RV32IM-NEXT: xor s2, s6, s2 -; RV32IM-NEXT: xor t2, t6, t2 +; RV32IM-NEXT: mul a0, a4, a0 +; RV32IM-NEXT: lw a5, 40(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a5, a4, a5 +; RV32IM-NEXT: lw t0, 36(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul t0, a4, t0 +; RV32IM-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s0, a4, s0 +; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s3, a4, s3 +; RV32IM-NEXT: lw s9, 24(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul s9, a4, s9 +; RV32IM-NEXT: lw s10, 20(sp) # 4-byte Folded Reload +; RV32IM-NEXT: mul a4, a4, s10 +; RV32IM-NEXT: lw s10, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor ra, ra, s10 +; RV32IM-NEXT: xor s8, s11, s8 +; RV32IM-NEXT: xor s4, s7, s4 +; RV32IM-NEXT: xor t2, s2, t2 ; RV32IM-NEXT: xor a7, t1, a7 ; RV32IM-NEXT: xor a2, a3, a2 ; RV32IM-NEXT: xor a0, a1, a0 -; RV32IM-NEXT: xor a1, s10, s5 -; RV32IM-NEXT: lw a3, 4(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, s2, a3 -; RV32IM-NEXT: xor t1, t2, s7 -; RV32IM-NEXT: xor a7, a7, t5 -; RV32IM-NEXT: xor a2, a2, a5 -; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a1, ra, s8 +; RV32IM-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, s4, a3 +; RV32IM-NEXT: lw t1, 4(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor t1, t2, t1 +; RV32IM-NEXT: xor a7, a7, s1 +; RV32IM-NEXT: xor a2, a2, a6 +; RV32IM-NEXT: xor a0, a0, a5 ; RV32IM-NEXT: xor a1, a1, a3 -; RV32IM-NEXT: lw a3, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw a3, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: xor a3, t1, a3 -; RV32IM-NEXT: xor a4, a7, s8 -; RV32IM-NEXT: xor a2, a2, t3 -; RV32IM-NEXT: xor a0, a0, t0 -; RV32IM-NEXT: lw a5, 76(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a1, a1, a5 -; RV32IM-NEXT: lw a5, 12(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a3, a3, a5 -; RV32IM-NEXT: lw a5, 68(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, a5 -; RV32IM-NEXT: xor a2, a2, s4 -; RV32IM-NEXT: xor a0, a0, t4 ; RV32IM-NEXT: lw a5, 72(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a4, a4, a5 +; RV32IM-NEXT: xor a5, a7, a5 +; RV32IM-NEXT: xor a2, a2, t4 +; RV32IM-NEXT: xor a0, a0, t0 +; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a1, a1, a6 +; RV32IM-NEXT: lw a6, 84(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a3, a3, a6 +; RV32IM-NEXT: lw a6, 76(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a2, a2, s6 ; RV32IM-NEXT: xor a0, a0, s0 -; RV32IM-NEXT: lui a5, 349525 -; RV32IM-NEXT: addi a5, a5, 1364 +; RV32IM-NEXT: lw a6, 80(sp) # 4-byte Folded Reload +; RV32IM-NEXT: xor a5, a5, a6 +; RV32IM-NEXT: xor a0, a0, s3 ; RV32IM-NEXT: xor a3, a1, a3 ; RV32IM-NEXT: slli a1, a1, 24 -; RV32IM-NEXT: xor a3, a3, a4 -; RV32IM-NEXT: lw a4, 80(sp) # 4-byte Folded Reload -; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: xor a3, a3, a5 +; RV32IM-NEXT: xor a0, a0, s9 ; RV32IM-NEXT: xor a2, a3, a2 -; RV32IM-NEXT: xor a0, a0, a6 -; RV32IM-NEXT: lw a6, 88(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a3, a2, a6 +; RV32IM-NEXT: xor a0, a0, a4 +; RV32IM-NEXT: and a3, a2, s5 ; RV32IM-NEXT: srli a4, a2, 8 ; RV32IM-NEXT: xor a0, a2, a0 ; RV32IM-NEXT: slli a3, a3, 8 -; RV32IM-NEXT: and a2, a4, a6 +; RV32IM-NEXT: and a2, a4, s5 ; RV32IM-NEXT: srli a0, a0, 24 ; RV32IM-NEXT: or a1, a1, a3 ; RV32IM-NEXT: or a0, a2, a0 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 4 -; RV32IM-NEXT: and a0, a0, s1 -; RV32IM-NEXT: and a1, a1, s1 +; RV32IM-NEXT: and a0, a0, t3 +; RV32IM-NEXT: and a1, a1, t3 ; RV32IM-NEXT: slli a0, a0, 4 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 2 -; RV32IM-NEXT: and a0, a0, s3 -; RV32IM-NEXT: and a1, a1, s3 +; RV32IM-NEXT: and a0, a0, t5 +; RV32IM-NEXT: and a1, a1, t5 ; RV32IM-NEXT: slli a0, a0, 2 ; RV32IM-NEXT: or a0, a1, a0 ; RV32IM-NEXT: srli a1, a0, 1 -; RV32IM-NEXT: lw a2, 84(sp) # 4-byte Folded Reload -; RV32IM-NEXT: and a0, a0, a2 -; RV32IM-NEXT: and a1, a1, a5 +; RV32IM-NEXT: and a0, a0, t6 +; RV32IM-NEXT: and a1, a1, t6 ; RV32IM-NEXT: slli a0, a0, 1 ; RV32IM-NEXT: or a0, a1, a0 -; RV32IM-NEXT: srli a0, a0, 1 ; RV32IM-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -10930,73 +10934,74 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: sd s10, 416(sp) # 8-byte Folded Spill ; RV64IM-NEXT: sd s11, 408(sp) # 8-byte Folded Spill ; RV64IM-NEXT: srli a3, a0, 24 -; RV64IM-NEXT: srli t0, a0, 8 +; RV64IM-NEXT: srli a7, a0, 8 ; RV64IM-NEXT: li s1, 255 -; RV64IM-NEXT: srli a5, a0, 40 +; RV64IM-NEXT: srli a6, a0, 40 ; RV64IM-NEXT: lui a4, 16 ; RV64IM-NEXT: srli t2, a0, 56 ; RV64IM-NEXT: srliw t3, a0, 24 ; RV64IM-NEXT: slli a2, a0, 56 ; RV64IM-NEXT: lui t4, 61681 -; RV64IM-NEXT: lui s0, 209715 +; RV64IM-NEXT: lui t6, 209715 ; RV64IM-NEXT: lui s9, 349525 ; RV64IM-NEXT: srli s7, a1, 24 ; RV64IM-NEXT: srli s5, a1, 8 ; RV64IM-NEXT: srli t5, a1, 40 -; RV64IM-NEXT: srli a7, a1, 56 +; RV64IM-NEXT: srli t0, a1, 56 ; RV64IM-NEXT: srliw ra, a1, 24 -; RV64IM-NEXT: slli a6, a1, 56 +; RV64IM-NEXT: slli a5, a1, 56 ; RV64IM-NEXT: li t1, 1 -; RV64IM-NEXT: lui s11, 128 +; RV64IM-NEXT: lui s10, 128 ; RV64IM-NEXT: lui s2, 256 ; RV64IM-NEXT: lui s3, 4096 -; RV64IM-NEXT: lui t6, 8192 +; RV64IM-NEXT: lui s0, 8192 ; RV64IM-NEXT: lui s8, 4080 ; RV64IM-NEXT: and a3, a3, s8 ; RV64IM-NEXT: slli s1, s1, 24 -; RV64IM-NEXT: addi s10, a4, -256 -; RV64IM-NEXT: and t0, t0, s1 +; RV64IM-NEXT: addi s11, a4, -256 +; RV64IM-NEXT: and a7, a7, s1 ; RV64IM-NEXT: sd s1, 400(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a3, t0, a3 -; RV64IM-NEXT: and t0, a0, s8 +; RV64IM-NEXT: or a3, a7, a3 +; RV64IM-NEXT: and a7, a0, s8 ; RV64IM-NEXT: slli t3, t3, 32 ; RV64IM-NEXT: addi s4, t4, -241 -; RV64IM-NEXT: addi s6, s0, 819 +; RV64IM-NEXT: addi s6, t6, 819 ; RV64IM-NEXT: addi a4, s9, 1365 ; RV64IM-NEXT: and t4, s7, s8 -; RV64IM-NEXT: and a5, a5, s10 -; RV64IM-NEXT: or a5, a5, t2 +; RV64IM-NEXT: and a6, a6, s11 +; RV64IM-NEXT: or a6, a6, t2 ; RV64IM-NEXT: and t2, a1, s8 -; RV64IM-NEXT: slli s0, ra, 32 -; RV64IM-NEXT: slli t0, t0, 24 -; RV64IM-NEXT: or s9, t0, t3 -; RV64IM-NEXT: slli t0, s4, 32 -; RV64IM-NEXT: add s4, s4, t0 -; RV64IM-NEXT: slli t0, s6, 32 -; RV64IM-NEXT: add s6, s6, t0 +; RV64IM-NEXT: slli t6, ra, 32 +; RV64IM-NEXT: slli a7, a7, 24 +; RV64IM-NEXT: or s9, a7, t3 +; RV64IM-NEXT: slli a7, s4, 32 +; RV64IM-NEXT: add s4, s4, a7 +; RV64IM-NEXT: slli a7, s6, 32 +; RV64IM-NEXT: add s6, s6, a7 ; RV64IM-NEXT: slli s7, t1, 11 -; RV64IM-NEXT: and t0, s5, s1 -; RV64IM-NEXT: or t0, t0, t4 +; RV64IM-NEXT: and a7, s5, s1 +; RV64IM-NEXT: or a7, a7, t4 ; RV64IM-NEXT: slli t4, t1, 32 -; RV64IM-NEXT: and t3, t5, s10 -; RV64IM-NEXT: or a7, t3, a7 +; RV64IM-NEXT: and t3, t5, s11 +; RV64IM-NEXT: or t0, t3, t0 ; RV64IM-NEXT: slli ra, t1, 33 ; RV64IM-NEXT: slli t2, t2, 24 -; RV64IM-NEXT: or t2, t2, s0 -; RV64IM-NEXT: slli s0, t1, 34 -; RV64IM-NEXT: or a3, a3, a5 -; RV64IM-NEXT: slli s1, t1, 35 -; RV64IM-NEXT: sd s10, 368(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a0, a0, s10 +; RV64IM-NEXT: or t2, t2, t6 +; RV64IM-NEXT: slli s1, t1, 34 +; RV64IM-NEXT: or a3, a3, a6 +; RV64IM-NEXT: slli a6, t1, 35 +; RV64IM-NEXT: sd a6, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd s11, 368(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a0, a0, s11 ; RV64IM-NEXT: slli a0, a0, 40 ; RV64IM-NEXT: or a0, a2, a0 ; RV64IM-NEXT: slli a2, t1, 36 -; RV64IM-NEXT: sd a2, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: or a2, t0, a7 +; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: or a2, a7, t0 ; RV64IM-NEXT: slli a7, t1, 37 -; RV64IM-NEXT: and a1, a1, s10 +; RV64IM-NEXT: and a1, a1, s11 ; RV64IM-NEXT: slli a1, a1, 40 -; RV64IM-NEXT: or a1, a6, a1 +; RV64IM-NEXT: or a1, a5, a1 ; RV64IM-NEXT: sd a4, 392(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a5, a4, 32 ; RV64IM-NEXT: add a5, a4, a5 @@ -11068,57 +11073,56 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 40 -; RV64IM-NEXT: and a2, s5, s11 +; RV64IM-NEXT: and a2, s5, s10 ; RV64IM-NEXT: and a3, s5, s2 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 312(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 304(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 41 ; RV64IM-NEXT: and a3, s5, s3 -; RV64IM-NEXT: and a4, s5, t6 +; RV64IM-NEXT: and a4, s5, s0 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: xor a3, a3, a4 -; RV64IM-NEXT: sd a3, 304(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a3, 296(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a3, t1, 48 ; RV64IM-NEXT: and a4, s5, t4 ; RV64IM-NEXT: and a5, s5, ra ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a5, a0, a5 ; RV64IM-NEXT: xor a4, a4, a5 -; RV64IM-NEXT: sd a4, 296(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a4, 288(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, t1, 49 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 288(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 280(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 56 ; RV64IM-NEXT: and a2, s5, a3 ; RV64IM-NEXT: and a3, s5, a4 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: mul a3, a0, a3 ; RV64IM-NEXT: xor a2, a2, a3 -; RV64IM-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a2, 272(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a2, t1, 57 ; RV64IM-NEXT: and a1, s5, a1 ; RV64IM-NEXT: and a2, s5, a2 ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: mul a2, a0, a2 ; RV64IM-NEXT: xor a1, a1, a2 -; RV64IM-NEXT: sd a1, 272(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli ra, t1, 39 ; RV64IM-NEXT: slli a2, t1, 42 ; RV64IM-NEXT: slli a4, t1, 43 -; RV64IM-NEXT: slli s2, t1, 44 -; RV64IM-NEXT: slli s3, t1, 45 -; RV64IM-NEXT: slli s6, t1, 46 -; RV64IM-NEXT: slli a1, t1, 47 -; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: slli s0, t1, 44 +; RV64IM-NEXT: slli s2, t1, 45 +; RV64IM-NEXT: slli s3, t1, 46 +; RV64IM-NEXT: slli s6, t1, 47 ; RV64IM-NEXT: slli a1, t1, 50 ; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a1, t1, 51 @@ -11177,43 +11181,43 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: and a5, s5, t5 ; RV64IM-NEXT: lui t6, 262144 ; RV64IM-NEXT: and a6, s5, t6 -; RV64IM-NEXT: and s11, s5, s0 -; RV64IM-NEXT: and t5, s5, s1 -; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and s11, s5, s1 +; RV64IM-NEXT: ld a1, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: and t5, s5, a1 +; RV64IM-NEXT: ld a1, 312(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and t6, s5, a1 ; RV64IM-NEXT: and a1, s5, a7 -; RV64IM-NEXT: sd a1, 320(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s4 ; RV64IM-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s4 +; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill ; RV64IM-NEXT: and ra, s5, ra ; RV64IM-NEXT: and a1, s5, a2 -; RV64IM-NEXT: sd a1, 88(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, a4 ; RV64IM-NEXT: sd a1, 80(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s2 +; RV64IM-NEXT: and a1, s5, a4 ; RV64IM-NEXT: sd a1, 72(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s3 +; RV64IM-NEXT: and a1, s5, s0 ; RV64IM-NEXT: sd a1, 64(sp) # 8-byte Folded Spill -; RV64IM-NEXT: and a1, s5, s6 +; RV64IM-NEXT: and a1, s5, s2 ; RV64IM-NEXT: sd a1, 56(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: and a1, s5, a1 +; RV64IM-NEXT: and a1, s5, s3 ; RV64IM-NEXT: sd a1, 48(sp) # 8-byte Folded Spill +; RV64IM-NEXT: and a1, s5, s6 +; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 256(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 40(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 248(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 32(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 240(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 24(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 232(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 16(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 224(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a1, s5, a1 -; RV64IM-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 0(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 216(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and s4, s5, a1 ; RV64IM-NEXT: ld a1, 208(sp) # 8-byte Folded Reload @@ -11233,13 +11237,13 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: srli t3, s5, 63 ; RV64IM-NEXT: mul s2, a0, a1 ; RV64IM-NEXT: mul a1, a0, a2 -; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s0, a0, a3 ; RV64IM-NEXT: mul a1, a0, t1 ; RV64IM-NEXT: sd a1, 152(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 200(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 240(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 184(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t4, a0, a1 ; RV64IM-NEXT: ld a1, 176(sp) # 8-byte Folded Reload @@ -11247,16 +11251,16 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: sd a1, 128(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 168(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 160(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 328(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 144(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t1, a0, a1 ; RV64IM-NEXT: ld a1, 136(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul s3, a0, a1 ; RV64IM-NEXT: mul a1, a0, t0 -; RV64IM-NEXT: sd a1, 192(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a7, a0, t2 ; RV64IM-NEXT: ld a1, 120(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul s1, a0, a1 @@ -11264,46 +11268,47 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 176(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a5 -; RV64IM-NEXT: sd a1, 200(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 208(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, a6 -; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 312(sp) # 8-byte Folded Spill ; RV64IM-NEXT: slli a4, a4, 31 ; RV64IM-NEXT: mul a5, a0, s11 ; RV64IM-NEXT: mul t2, a0, t5 ; RV64IM-NEXT: mul s11, a0, t6 -; RV64IM-NEXT: ld a1, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 ; RV64IM-NEXT: sd a1, 168(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 232(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul a1, a0, ra -; RV64IM-NEXT: sd a1, 248(sp) # 8-byte Folded Spill -; RV64IM-NEXT: ld a1, 88(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a2, a0, a1 +; RV64IM-NEXT: sd a1, 256(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 80(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a6, a0, a1 +; RV64IM-NEXT: mul a2, a0, a1 ; RV64IM-NEXT: ld a1, 72(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul t5, a0, a1 +; RV64IM-NEXT: mul a6, a0, a1 ; RV64IM-NEXT: ld a1, 64(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul s5, a0, a1 +; RV64IM-NEXT: mul t5, a0, a1 ; RV64IM-NEXT: ld a1, 56(sp) # 8-byte Folded Reload -; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; RV64IM-NEXT: mul s5, a0, a1 ; RV64IM-NEXT: ld a1, 48(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; RV64IM-NEXT: sd a1, 184(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld a1, 40(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a1, a0, a1 -; RV64IM-NEXT: ld a3, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: sd a1, 224(sp) # 8-byte Folded Spill +; RV64IM-NEXT: ld a1, 32(sp) # 8-byte Folded Reload +; RV64IM-NEXT: mul a1, a0, a1 +; RV64IM-NEXT: ld a3, 24(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul a3, a0, a3 -; RV64IM-NEXT: ld t0, 24(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t0, 16(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t0, a0, t0 -; RV64IM-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld t6, 8(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul t6, a0, t6 -; RV64IM-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld ra, 0(sp) # 8-byte Folded Reload ; RV64IM-NEXT: mul ra, a0, ra ; RV64IM-NEXT: mul s4, a0, s4 +; RV64IM-NEXT: sd s4, 192(sp) # 8-byte Folded Spill ; RV64IM-NEXT: mul s6, a0, s6 ; RV64IM-NEXT: mul s7, a0, s7 ; RV64IM-NEXT: mul s8, a0, s8 @@ -11312,83 +11317,80 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: slli t3, t3, 63 ; RV64IM-NEXT: mul a4, a0, a4 ; RV64IM-NEXT: mul a0, a0, t3 -; RV64IM-NEXT: sd a0, 320(sp) # 8-byte Folded Spill ; RV64IM-NEXT: ld t3, 360(sp) # 8-byte Folded Reload -; RV64IM-NEXT: ld a0, 352(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t3, a0 -; RV64IM-NEXT: ld a0, 344(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s2, a0, s2 -; RV64IM-NEXT: ld a0, 336(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, a0, s0 -; RV64IM-NEXT: ld a0, 328(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, a0, t4 -; RV64IM-NEXT: ld a0, 312(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, a0, t1 -; RV64IM-NEXT: ld a0, 304(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a0, a7 -; RV64IM-NEXT: ld a0, 296(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a0, a5 -; RV64IM-NEXT: ld a0, 288(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a0, a2 -; RV64IM-NEXT: ld a0, 280(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a1, a0, a1 -; RV64IM-NEXT: ld a0, 272(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s6, a0, s6 +; RV64IM-NEXT: ld s4, 352(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t3, s4 +; RV64IM-NEXT: ld s4, 344(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s2, s4, s2 +; RV64IM-NEXT: ld s4, 336(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s4, s0 +; RV64IM-NEXT: ld s4, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, s4, t4 +; RV64IM-NEXT: ld s4, 304(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, s4, t1 +; RV64IM-NEXT: ld s4, 296(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, s4, a7 +; RV64IM-NEXT: ld s4, 288(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, s4, a5 +; RV64IM-NEXT: ld s4, 280(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, s4, a2 +; RV64IM-NEXT: ld s4, 272(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, s4, a1 +; RV64IM-NEXT: ld s4, 264(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s6, s4, s6 ; RV64IM-NEXT: xor t3, t3, s2 -; RV64IM-NEXT: ld a0, 152(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor s0, s0, a0 -; RV64IM-NEXT: ld a0, 128(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t4, t4, a0 +; RV64IM-NEXT: ld s2, 152(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor s0, s0, s2 +; RV64IM-NEXT: ld s2, 128(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t4, t4, s2 ; RV64IM-NEXT: xor t1, t1, s3 ; RV64IM-NEXT: xor a7, a7, s1 ; RV64IM-NEXT: xor a5, a5, t2 ; RV64IM-NEXT: xor a2, a2, a6 ; RV64IM-NEXT: xor a1, a1, a3 ; RV64IM-NEXT: xor a3, s6, s7 -; RV64IM-NEXT: ld a0, 240(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a6, t3, a0 -; RV64IM-NEXT: ld a0, 232(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t2, s0, a0 -; RV64IM-NEXT: ld a0, 208(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t3, t4, a0 -; RV64IM-NEXT: ld a0, 192(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t1, t1, a0 -; RV64IM-NEXT: ld a0, 176(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, a0 +; RV64IM-NEXT: ld a6, 248(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a6, t3, a6 +; RV64IM-NEXT: ld t2, 240(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t2, s0, t2 +; RV64IM-NEXT: ld t3, 216(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t3, t4, t3 +; RV64IM-NEXT: ld t4, 200(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t1, t1, t4 +; RV64IM-NEXT: ld t4, 176(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t4 ; RV64IM-NEXT: xor a5, a5, s11 ; RV64IM-NEXT: xor a2, a2, t5 ; RV64IM-NEXT: xor a1, a1, t0 ; RV64IM-NEXT: xor a3, a3, s8 -; RV64IM-NEXT: ld a0, 264(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor t0, t3, a0 -; RV64IM-NEXT: ld a0, 200(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, a0 -; RV64IM-NEXT: ld a0, 168(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, a0 +; RV64IM-NEXT: ld t0, 328(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor t0, t3, t0 +; RV64IM-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t3 +; RV64IM-NEXT: ld t3, 168(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t3 ; RV64IM-NEXT: xor a2, a2, s5 ; RV64IM-NEXT: xor a1, a1, t6 ; RV64IM-NEXT: xor a3, a3, s9 ; RV64IM-NEXT: xor t2, a6, t2 ; RV64IM-NEXT: xor t0, t2, t0 -; RV64IM-NEXT: ld a0, 256(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a7, a7, a0 -; RV64IM-NEXT: ld a0, 224(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, a0 -; RV64IM-NEXT: ld a0, 184(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a0 +; RV64IM-NEXT: ld t2, 312(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a7, a7, t2 +; RV64IM-NEXT: ld t2, 232(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, t2 +; RV64IM-NEXT: ld t2, 184(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, t2 ; RV64IM-NEXT: xor a1, a1, ra ; RV64IM-NEXT: xor a3, a3, s10 ; RV64IM-NEXT: xor t0, t0, t1 ; RV64IM-NEXT: xor a4, a7, a4 -; RV64IM-NEXT: ld a0, 248(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a5, a5, a0 -; RV64IM-NEXT: ld a0, 216(sp) # 8-byte Folded Reload -; RV64IM-NEXT: xor a2, a2, a0 -; RV64IM-NEXT: xor a1, a1, s4 -; RV64IM-NEXT: lui a7, %hi(.LCPI18_0) -; RV64IM-NEXT: ld a7, %lo(.LCPI18_0)(a7) +; RV64IM-NEXT: ld a7, 256(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a5, a5, a7 +; RV64IM-NEXT: ld a7, 224(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a2, a2, a7 ; RV64IM-NEXT: slli a6, a6, 56 -; RV64IM-NEXT: ld a0, 320(sp) # 8-byte Folded Reload +; RV64IM-NEXT: ld a7, 192(sp) # 8-byte Folded Reload +; RV64IM-NEXT: xor a1, a1, a7 ; RV64IM-NEXT: xor a0, a3, a0 ; RV64IM-NEXT: ld t1, 368(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a3, t0, t1 @@ -11396,8 +11398,8 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: slli a3, a3, 40 ; RV64IM-NEXT: xor a4, a4, a5 ; RV64IM-NEXT: or a3, a6, a3 -; RV64IM-NEXT: lui t0, 4080 -; RV64IM-NEXT: and a5, a4, t0 +; RV64IM-NEXT: lui a7, 4080 +; RV64IM-NEXT: and a5, a4, a7 ; RV64IM-NEXT: xor a2, a4, a2 ; RV64IM-NEXT: srli a4, a4, 8 ; RV64IM-NEXT: slli a5, a5, 24 @@ -11406,12 +11408,12 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: and a4, a4, a6 ; RV64IM-NEXT: srli a2, a2, 24 ; RV64IM-NEXT: srliw a6, a1, 24 -; RV64IM-NEXT: and a2, a2, t0 -; RV64IM-NEXT: srli t0, a1, 40 +; RV64IM-NEXT: and a2, a2, a7 +; RV64IM-NEXT: srli a7, a1, 40 ; RV64IM-NEXT: xor a0, a1, a0 ; RV64IM-NEXT: slli a6, a6, 32 ; RV64IM-NEXT: or a2, a4, a2 -; RV64IM-NEXT: and a1, t0, t1 +; RV64IM-NEXT: and a1, a7, t1 ; RV64IM-NEXT: srli a0, a0, 56 ; RV64IM-NEXT: or a4, a5, a6 ; RV64IM-NEXT: or a0, a1, a0 @@ -11433,11 +11435,9 @@ define i32 @clmulh_i32(i32 %a, i32 %b) nounwind { ; RV64IM-NEXT: srli a1, a0, 1 ; RV64IM-NEXT: ld a2, 392(sp) # 8-byte Folded Reload ; RV64IM-NEXT: and a0, a0, a2 -; RV64IM-NEXT: and a1, a1, a7 +; RV64IM-NEXT: and a1, a1, a2 ; RV64IM-NEXT: slli a0, a0, 1 ; RV64IM-NEXT: or a0, a1, a0 -; RV64IM-NEXT: slli a0, a0, 31 -; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ld ra, 504(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s0, 496(sp) # 8-byte Folded Reload ; RV64IM-NEXT: ld s1, 488(sp) # 8-byte Folded Reload From db090117ef32cd782af7e6b3d320e9f5b6d1dc75 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 16:04:45 +0000 Subject: [PATCH 07/13] [LangRef] s/Euclidean mul/mul/ --- llvm/docs/LangRef.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1000693f8261e..6fb9c9c2ae695 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -18415,15 +18415,15 @@ the low-bits. Arguments: """""""""" -The arguments may be any integer type or vector of integer type. Both arguments -and result must have the same type. +The arguments may be any integer type or vector of integer type. Both +arguments and result must have the same type. Semantics: """""""""" The '``llvm.clmul``' intrinsic computes carry-less multiply of its arguments, -which is the result of applying the standard Euclidean multiplication algorithm, -where all of the additions are replaced with XORs, and returns the low-bits. +which is the result of applying the standard multiplication algorithm, where +all of the additions are replaced with XORs, and returns the low-bits. The vector variants operate lane-wise. Example: From c2037df86c0b1a136541de4eedd01cf8ba5901db Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 16:23:54 +0000 Subject: [PATCH 08/13] [DAGCombiner] Fix zext-bw related thinko --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 906f4b2a8ec39..90e12ff909029 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10318,13 +10318,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). SDLoc DL(N); EVT VT = N->getValueType(0); + uint64_t HalfBW = VT.getScalarSizeInBits() / 2; SDValue X, Y; if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))), - m_SpecificInt(VT.getScalarSizeInBits() / 2 - 1)))) + m_SpecificInt(HalfBW - 1))) && + X.getScalarValueSizeInBits() == HalfBW) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))), - m_SpecificInt(VT.getScalarSizeInBits() / 2)))) + m_SpecificInt(HalfBW))) && + X.getScalarValueSizeInBits() == HalfBW) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); From 23e5e88a972ee8e329c0d73da781be4b05706b13 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 19:04:23 +0000 Subject: [PATCH 09/13] [ISel] Address topperc's review --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 49 ++++++++++--------- .../CodeGen/SelectionDAG/TargetLowering.cpp | 11 ++--- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 90e12ff909029..8077f6ee3a8fa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10315,28 +10315,6 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) return R; - // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). - SDLoc DL(N); - EVT VT = N->getValueType(0); - uint64_t HalfBW = VT.getScalarSizeInBits() / 2; - SDValue X, Y; - if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))), - m_SpecificInt(HalfBW - 1))) && - X.getScalarValueSizeInBits() == HalfBW) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, - DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); - if (sd_match(N, m_Srl(m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y))), - m_SpecificInt(HalfBW))) && - X.getScalarValueSizeInBits() == HalfBW) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, - DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); - - // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> clmulh(x, y). - if (sd_match(N, m_Srl(m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), - m_BitReverse(m_Value(Y)))), - m_SpecificInt(1)))) - return DAG.getNode(ISD::CLMULH, DL, VT, X, Y); - // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize @@ -10372,6 +10350,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { return SDValue(); // Attempt to fold the constants, shifting the binop RHS by the shift amount. + SDLoc DL(N); + EVT VT = N->getValueType(0); if (SDValue NewRHS = DAG.FoldConstantArithmetic( N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) { SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), @@ -11440,6 +11420,28 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (SDValue AVG = foldShiftToAvg(N, DL)) return AVG; + // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). + if (VT.getScalarSizeInBits() % 2) + return SDValue(); + uint64_t HalfBW = VT.getScalarSizeInBits() / 2; + SDValue Y; + if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && + sd_match(N1, m_SpecificInt(HalfBW - 1)) && + X.getScalarValueSizeInBits() == HalfBW) + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); + if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && + sd_match(N1, m_SpecificInt(HalfBW)) && + X.getScalarValueSizeInBits() == HalfBW) + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); + + // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> clmulh(x, y). + if (sd_match(N0, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), + m_BitReverse(m_Value(Y))))) && + sd_match(N1, m_SpecificInt(1))) + return DAG.getNode(ISD::CLMULH, DL, VT, X, Y); + return SDValue(); } @@ -11792,8 +11794,7 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { return DAG.getNode(ISD::SRL, DL, VT, X, Y); // fold bitreverse(clmul(bitreverse(x), bitreverse(y))) -> clmulr(x, y) - if (sd_match(N, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), - m_BitReverse(m_Value(Y)))))) + if (sd_match(N0, m_Clmul(m_BitReverse(m_Value(X)), m_BitReverse(m_Value(Y))))) return DAG.getNode(ISD::CLMULR, DL, VT, X, Y); return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index eda832e981c64..e6207a1ec76b9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8310,10 +8310,6 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { unsigned BW = VT.getScalarSizeInBits(); unsigned Opcode = Node->getOpcode(); - if (VT.isVector() && - isOperationLegalOrCustomOrPromote(Opcode, VT.getVectorElementType())) - return DAG.UnrollVectorOp(Node); - switch (Opcode) { case ISD::CLMUL: { SDValue Res = DAG.getConstant(0, DL, VT); @@ -8327,9 +8323,10 @@ SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const { } case ISD::CLMULR: case ISD::CLMULH: { - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), 2 * BW); - // For example, ExtVT = i64 based operations aren't legal on rv32; use - // bitreverse-based lowering in this case. + EVT ExtVT = + VT.changeElementType(EVT::getIntegerVT(*DAG.getContext(), 2 * BW)); + // For example, ExtVT = i64 based operations aren't legal on a 32-bit + // target; use bitreverse-based lowering in this case. if (!isOperationLegalOrCustom(ISD::ZERO_EXTEND, ExtVT) || !isOperationLegalOrCustom(ISD::SRL, ExtVT)) { SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X); From b9e3ee5d7fe0f9114a5fc165682716d7be90c0bc Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 19:17:17 +0000 Subject: [PATCH 10/13] [DAGCombiner] Avoid early-exit --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8077f6ee3a8fa..2ebb97cce02d5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11420,27 +11420,28 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (SDValue AVG = foldShiftToAvg(N, DL)) return AVG; - // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). - if (VT.getScalarSizeInBits() % 2) - return SDValue(); - uint64_t HalfBW = VT.getScalarSizeInBits() / 2; - SDValue Y; - if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && - sd_match(N1, m_SpecificInt(HalfBW - 1)) && - X.getScalarValueSizeInBits() == HalfBW) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, - DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); - if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && - sd_match(N1, m_SpecificInt(HalfBW)) && - X.getScalarValueSizeInBits() == HalfBW) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, - DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); - - // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> clmulh(x, y). - if (sd_match(N0, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), - m_BitReverse(m_Value(Y))))) && - sd_match(N1, m_SpecificInt(1))) - return DAG.getNode(ISD::CLMULH, DL, VT, X, Y); + if (VT.getScalarSizeInBits() % 2 == 0) { + // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). + uint64_t HalfBW = VT.getScalarSizeInBits() / 2; + SDValue Y; + if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && + sd_match(N1, m_SpecificInt(HalfBW - 1)) && + X.getScalarValueSizeInBits() == HalfBW) + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); + if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && + sd_match(N1, m_SpecificInt(HalfBW)) && + X.getScalarValueSizeInBits() == HalfBW) + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); + + // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> + // clmulh(x, y). + if (sd_match(N0, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), + m_BitReverse(m_Value(Y))))) && + sd_match(N1, m_SpecificInt(1))) + return DAG.getNode(ISD::CLMULH, DL, VT, X, Y); + } return SDValue(); } From af98f14404d56d8a1253bd5fdafbaca242de7032 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 19:27:08 +0000 Subject: [PATCH 11/13] [rvv] Test vector variants of clmul[hr] --- llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll | 88 ++++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-clmul.ll | 154 ++++++++++++++++++ 2 files changed, 242 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll index ff4f1646afd2d..3cb27a5602bbe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/clmul-sdnode.ll @@ -5547,3 +5547,91 @@ define @clmul_nxv8i64( %x, @llvm.clmul.nxv8i64( %x, %y) ret %a } + +define @clmulr_nxv4i8( %a, %b) nounwind { +; CHECK-LABEL: clmulr_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v8, v9 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vand.vi v9, v8, 2 +; CHECK-NEXT: vand.vi v11, v8, 1 +; CHECK-NEXT: vmul.vv v9, v10, v9 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v11, v9 +; CHECK-NEXT: vand.vi v11, v8, 4 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vi v11, v8, 8 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vxor.vv v8, v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 7 +; CHECK-NEXT: ret + %a.ext = zext %a to + %b.ext = zext %b to + %clmul = call @llvm.clmul.nxv4i8( %a.ext, %b.ext) + %res.ext = lshr %clmul, splat(i16 7) + %res = trunc %res.ext to + ret %res +} + +define @clmulh_nxv4i8( %a, %b) nounwind { +; CHECK-LABEL: clmulh_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v8, v9 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vand.vi v9, v8, 2 +; CHECK-NEXT: vand.vi v11, v8, 1 +; CHECK-NEXT: vmul.vv v9, v10, v9 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v11, v9 +; CHECK-NEXT: vand.vi v11, v8, 4 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vi v11, v8, 8 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vxor.vv v8, v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 8 +; CHECK-NEXT: ret + %a.ext = zext %a to + %b.ext = zext %b to + %clmul = call @llvm.clmul.nxv4i8( %a.ext, %b.ext) + %res.ext = lshr %clmul, splat(i16 8) + %res = trunc %res.ext to + ret %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll index 56379e0b55e10..08c204d8b0cfc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-clmul.ll @@ -4627,3 +4627,157 @@ define <8 x i64> @clmul_v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { %a = call <8 x i64> @llvm.clmul.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %a } + +define <4 x i8> @clmulr_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { +; CHECK-LABEL: clmulr_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v8, v9 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vand.vi v9, v8, 2 +; CHECK-NEXT: vand.vi v11, v8, 1 +; CHECK-NEXT: vmul.vv v9, v10, v9 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v11, v9 +; CHECK-NEXT: vand.vi v11, v8, 4 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vi v11, v8, 8 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vxor.vv v8, v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 7 +; CHECK-NEXT: ret + %a.ext = zext <4 x i8> %a to <4 x i16> + %b.ext = zext <4 x i8> %b to <4 x i16> + %clmul = call <4 x i16> @llvm.clmul.v4i8(<4 x i16> %a.ext, <4 x i16> %b.ext) + %res.ext = lshr <4 x i16> %clmul, splat(i16 7) + %res = trunc <4 x i16> %res.ext to <4 x i8> + ret <4 x i8> %res +} + +define <4 x i8> @clmulh_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { +; CHECK-LABEL: clmulh_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v8, v9 +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vand.vi v9, v8, 2 +; CHECK-NEXT: vand.vi v11, v8, 1 +; CHECK-NEXT: vmul.vv v9, v10, v9 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v11, v9 +; CHECK-NEXT: vand.vi v11, v8, 4 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vi v11, v8, 8 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 256 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 512 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 1024 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: slli a0, a0, 11 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 2 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vand.vx v11, v8, a0 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vmul.vv v11, v10, v11 +; CHECK-NEXT: vxor.vv v9, v9, v11 +; CHECK-NEXT: vmul.vv v8, v10, v8 +; CHECK-NEXT: vxor.vv v8, v9, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 8 +; CHECK-NEXT: ret + %a.ext = zext <4 x i8> %a to <4 x i16> + %b.ext = zext <4 x i8> %b to <4 x i16> + %clmul = call <4 x i16> @llvm.clmul.v4i8(<4 x i16> %a.ext, <4 x i16> %b.ext) + %res.ext = lshr <4 x i16> %clmul, splat(i16 8) + %res = trunc <4 x i16> %res.ext to <4 x i8> + ret <4 x i8> %res +} From 00329fb458f9ed44bc27670d79f91591e28bf094 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 20 Nov 2025 20:48:23 +0000 Subject: [PATCH 12/13] [DAGCombiner] Fix thinko wrt if-scoping --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2ebb97cce02d5..47c6fa076b429 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11420,10 +11420,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (SDValue AVG = foldShiftToAvg(N, DL)) return AVG; + SDValue Y; if (VT.getScalarSizeInBits() % 2 == 0) { // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). uint64_t HalfBW = VT.getScalarSizeInBits() / 2; - SDValue Y; if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && sd_match(N1, m_SpecificInt(HalfBW - 1)) && X.getScalarValueSizeInBits() == HalfBW) @@ -11434,15 +11434,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { X.getScalarValueSizeInBits() == HalfBW) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); - - // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> - // clmulh(x, y). - if (sd_match(N0, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), - m_BitReverse(m_Value(Y))))) && - sd_match(N1, m_SpecificInt(1))) - return DAG.getNode(ISD::CLMULH, DL, VT, X, Y); } + // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 -> + // clmulh(x, y). + if (sd_match(N0, m_BitReverse(m_Clmul(m_BitReverse(m_Value(X)), + m_BitReverse(m_Value(Y))))) && + sd_match(N1, m_SpecificInt(1))) + return DAG.getNode(ISD::CLMULH, DL, VT, X, Y); + return SDValue(); } From ab483105cb59046b0379d002c2116e325a63e0a1 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 21 Nov 2025 08:46:43 +0000 Subject: [PATCH 13/13] [DAGCombiner] Nested ifs --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 47c6fa076b429..9ae7545e32a38 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11425,15 +11425,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // Fold clmul(zext(x), zext(y)) >> (BW - 1 | BW) -> clmul(r|h)(x, y). uint64_t HalfBW = VT.getScalarSizeInBits() / 2; if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && - sd_match(N1, m_SpecificInt(HalfBW - 1)) && - X.getScalarValueSizeInBits() == HalfBW) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, - DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); - if (sd_match(N0, m_Clmul(m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && - sd_match(N1, m_SpecificInt(HalfBW)) && - X.getScalarValueSizeInBits() == HalfBW) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, - DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); + X.getScalarValueSizeInBits() == HalfBW) { + if (sd_match(N1, m_SpecificInt(HalfBW - 1))) + return DAG.getNode( + ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULR, DL, X.getValueType(), X, Y)); + if (sd_match(N1, m_SpecificInt(HalfBW))) + return DAG.getNode( + ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::CLMULH, DL, X.getValueType(), X, Y)); + } } // Fold bitreverse(clmul(bitreverse(x), bitreverse(y))) >> 1 ->