diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index ea0873f41ebba..aef7891af8f93 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -364,6 +364,43 @@ class LegalizerHelper { LLT HalfTy, LLT ShiftAmtTy); + /// Multi-way shift legalization: directly split wide shifts into target-sized + /// parts in a single step, avoiding recursive binary splitting. + LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, + LLT TargetTy); + + /// Optimized path for constant shift amounts using static indexing. + /// Directly calculates which source parts contribute to each output part + /// without generating runtime select chains. + LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, + const APInt &Amt, + LLT TargetTy, + LLT ShiftAmtTy); + + struct ShiftParams { + Register WordShift; // Number of complete words to shift + Register BitShift; // Number of bits to shift within words + Register InvBitShift; // Complement bit shift (TargetBits - BitShift) + Register Zero; // Zero constant for SHL/LSHR fill + Register SignBit; // Sign extension value for ASHR fill + }; + + /// Generates a single output part for constant shifts using direct indexing. + /// Calculates which source parts contribute and how they're combined. + LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, + unsigned NumParts, + ArrayRef SrcParts, + const ShiftParams &Params, + LLT TargetTy, LLT ShiftAmtTy); + + /// Generates a shift part with carry for variable shifts. + /// Combines main operand shifted by BitShift with carry bits from adjacent + /// operand. + LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, + Register MainOperand, + Register ShiftAmt, LLT TargetTy, + Register CarryOperand = Register()); + LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 008c18837a522..b17fa226e518b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5970,7 +5970,6 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, return Legalized; } -// TODO: Optimize if constant shift amount. LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT RequestedTy) { @@ -5992,6 +5991,27 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, if (DstEltSize % 2 != 0) return UnableToLegalize; + // Check if we should use multi-way splitting instead of recursive binary + // splitting. + // + // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit -> + // 4×32-bit) in a single legalization step, avoiding the recursive overhead + // and dependency chains created by usual binary splitting approach + // (128->64->32). + // + // The >= 8 parts threshold ensures we only use this optimization when binary + // splitting would require multiple recursive passes, avoiding overhead for + // simple 2-way splits where binary approach is sufficient. + if (RequestedTy.isValid() && RequestedTy.isScalar() && + DstEltSize % RequestedTy.getSizeInBits() == 0) { + const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits(); + // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive + // steps). + if (NumParts >= 8) + return narrowScalarShiftMultiway(MI, RequestedTy); + } + + // Fall back to binary splitting: // Ignore the input type. We can only go to exactly half the size of the // input. If that isn't small enough, the resulting pieces will be further // legalized. @@ -6080,6 +6100,358 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +Register LegalizerHelper::buildConstantShiftPart(unsigned Opcode, + unsigned PartIdx, + unsigned NumParts, + ArrayRef SrcParts, + const ShiftParams &Params, + LLT TargetTy, LLT ShiftAmtTy) { + auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI); + auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI); + assert(WordShiftConst && BitShiftConst && "Expected constants"); + + const unsigned ShiftWords = WordShiftConst->getZExtValue(); + const unsigned ShiftBits = BitShiftConst->getZExtValue(); + const bool NeedsInterWordShift = ShiftBits != 0; + + switch (Opcode) { + case TargetOpcode::G_SHL: { + // Data moves from lower indices to higher indices + // If this part would come from a source beyond our range, it's zero + if (PartIdx < ShiftWords) + return Params.Zero; + + unsigned SrcIdx = PartIdx - ShiftWords; + if (!NeedsInterWordShift) + return SrcParts[SrcIdx]; + + // Combine shifted main part with carry from previous part + auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift); + if (SrcIdx > 0) { + auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1], + Params.InvBitShift); + return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0); + } + return Hi.getReg(0); + } + + case TargetOpcode::G_LSHR: { + unsigned SrcIdx = PartIdx + ShiftWords; + if (SrcIdx >= NumParts) + return Params.Zero; + if (!NeedsInterWordShift) + return SrcParts[SrcIdx]; + + // Combine shifted main part with carry from next part + auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift); + if (SrcIdx + 1 < NumParts) { + auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1], + Params.InvBitShift); + return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0); + } + return Lo.getReg(0); + } + + case TargetOpcode::G_ASHR: { + // Like LSHR but preserves sign bit + unsigned SrcIdx = PartIdx + ShiftWords; + if (SrcIdx >= NumParts) + return Params.SignBit; + if (!NeedsInterWordShift) + return SrcParts[SrcIdx]; + + // Only the original MSB part uses arithmetic shift to preserve sign. All + // other parts use logical shift since they're just moving data bits. + auto Lo = + (SrcIdx == NumParts - 1) + ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift) + : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift); + Register HiSrc = + (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit; + auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift); + return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0); + } + + default: + llvm_unreachable("not a shift"); + } +} + +Register LegalizerHelper::buildVariableShiftPart(unsigned Opcode, + Register MainOperand, + Register ShiftAmt, + LLT TargetTy, + Register CarryOperand) { + // This helper generates a single output part for variable shifts by combining + // the main operand (shifted by BitShift) with carry bits from an adjacent + // part. + + // For G_ASHR, individual parts don't have their own sign bit, only the + // complete value does. So we use LSHR for the main operand shift in ASHR + // context. + unsigned MainOpcode = + (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode; + + // Perform the primary shift on the main operand + Register MainShifted = + MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt}) + .getReg(0); + + // No carry operand available + if (!CarryOperand.isValid()) + return MainShifted; + + // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs, + // so carry bits aren't needed. + LLT ShiftAmtTy = MRI.getType(ShiftAmt); + auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0); + LLT BoolTy = LLT::scalar(1); + auto IsZeroBitShift = + MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst); + + // Extract bits from the adjacent part that will "carry over" into this part. + // The carry direction is opposite to the main shift direction, so we can + // align the two shifted values before combining them with OR. + + // Determine the carry shift opcode (opposite direction) + unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR + : TargetOpcode::G_SHL; + + // Calculate inverse shift amount: BitWidth - ShiftAmt + auto TargetBitsConst = + MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits()); + auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt); + + // Shift the carry operand + Register CarryBits = + MIRBuilder + .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt}) + .getReg(0); + + // If BitShift is 0, don't include carry bits (InvShiftAmt would equal + // TargetBits which would be poison for the individual carry shift operation). + auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0); + Register SafeCarryBits = + MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits) + .getReg(0); + + // Combine the main shifted part with the carry bits + return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarShiftByConstantMultiway(MachineInstr &MI, + const APInt &Amt, + LLT TargetTy, + LLT ShiftAmtTy) { + // Any wide shift can be decomposed into WordShift + BitShift components. + // When shift amount is known constant, directly compute the decomposition + // values and generate constant registers. + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + + const unsigned DstBits = DstTy.getScalarSizeInBits(); + const unsigned TargetBits = TargetTy.getScalarSizeInBits(); + const unsigned NumParts = DstBits / TargetBits; + + assert(DstBits % TargetBits == 0 && "Target type must evenly divide source"); + + // When the shift amount is known at compile time, we just calculate which + // source parts contribute to each output part. + + SmallVector SrcParts; + extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI); + + if (Amt.isZero()) { + // No shift needed, just copy + MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts); + MI.eraseFromParent(); + return Legalized; + } + + ShiftParams Params; + const unsigned ShiftWords = Amt.getZExtValue() / TargetBits; + const unsigned ShiftBits = Amt.getZExtValue() % TargetBits; + + // Generate constants and values needed by all shift types + Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0); + Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0); + Params.InvBitShift = + MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0); + Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0); + + // For ASHR, we need the sign-extended value to fill shifted-out positions + if (MI.getOpcode() == TargetOpcode::G_ASHR) + Params.SignBit = + MIRBuilder + .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1], + MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1)) + .getReg(0); + + SmallVector DstParts(NumParts); + for (unsigned I = 0; I < NumParts; ++I) + DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts, + Params, TargetTy, ShiftAmtTy); + + MIRBuilder.buildMergeLikeInstr(DstReg, DstParts); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register AmtReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT ShiftAmtTy = MRI.getType(AmtReg); + + const unsigned DstBits = DstTy.getScalarSizeInBits(); + const unsigned TargetBits = TargetTy.getScalarSizeInBits(); + const unsigned NumParts = DstBits / TargetBits; + + assert(DstBits % TargetBits == 0 && "Target type must evenly divide source"); + assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2"); + + // If the shift amount is known at compile time, we can use direct indexing + // instead of generating select chains in the general case. + if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI)) + return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy, + ShiftAmtTy); + + // For runtime-variable shift amounts, we must generate a more complex + // sequence that handles all possible shift values using select chains. + + // Split the input into target-sized pieces + SmallVector SrcParts; + extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI); + + // Shifting by zero should be a no-op. + auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0); + LLT BoolTy = LLT::scalar(1); + auto IsZeroShift = + MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst); + + // Any wide shift can be decomposed into two components: + // 1. WordShift: number of complete target-sized words to shift + // 2. BitShift: number of bits to shift within each word + // + // Example: 128-bit >> 50 with 32-bit target: + // WordShift = 50 / 32 = 1 (shift right by 1 complete word) + // BitShift = 50 % 32 = 18 (shift each word right by 18 bits) + unsigned TargetBitsLog2 = Log2_32(TargetBits); + auto TargetBitsLog2Const = + MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2); + auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1); + + Register WordShift = + MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0); + Register BitShift = + MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0); + + // Fill values: + // - SHL/LSHR: fill with zeros + // - ASHR: fill with sign-extended MSB + Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0); + + Register FillValue; + if (MI.getOpcode() == TargetOpcode::G_ASHR) { + auto TargetBitsMinusOneConst = + MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1); + FillValue = MIRBuilder + .buildAShr(TargetTy, SrcParts[NumParts - 1], + TargetBitsMinusOneConst) + .getReg(0); + } else { + FillValue = ZeroReg; + } + + SmallVector DstParts(NumParts); + + // For each output part, generate a select chain that chooses the correct + // result based on the runtime WordShift value. This handles all possible + // word shift amounts by pre-calculating what each would produce. + for (unsigned I = 0; I < NumParts; ++I) { + // Initialize with appropriate default value for this shift type + Register InBoundsResult = FillValue; + + // clang-format off + // Build a branchless select chain by pre-computing results for all possible + // WordShift values (0 to NumParts-1). Each iteration nests a new select: + // + // K=0: select(WordShift==0, result0, FillValue) + // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue)) + // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...))) + // clang-format on + for (unsigned K = 0; K < NumParts; ++K) { + auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K); + auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, + WordShift, WordShiftKConst); + + // Calculate source indices for this word shift + // + // For 4-part 128-bit value with K=1 word shift: + // SHL: [3][2][1][0] << K => [2][1][0][Z] + // -> (MainIdx = I-K, CarryIdx = I-K-1) + // LSHR: [3][2][1][0] >> K => [Z][3][2][1] + // -> (MainIdx = I+K, CarryIdx = I+K+1) + int MainSrcIdx; + int CarrySrcIdx; // Index for the word that provides the carried-in bits. + + switch (MI.getOpcode()) { + case TargetOpcode::G_SHL: + MainSrcIdx = (int)I - (int)K; + CarrySrcIdx = MainSrcIdx - 1; + break; + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: + MainSrcIdx = (int)I + (int)K; + CarrySrcIdx = MainSrcIdx + 1; + break; + default: + llvm_unreachable("Not a shift"); + } + + // Check bounds and build the result for this word shift + Register ResultForK; + if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) { + Register MainOp = SrcParts[MainSrcIdx]; + Register CarryOp; + + // Determine carry operand with bounds checking + if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts) + CarryOp = SrcParts[CarrySrcIdx]; + else if (MI.getOpcode() == TargetOpcode::G_ASHR && + CarrySrcIdx >= (int)NumParts) + CarryOp = FillValue; // Use sign extension + + ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift, + TargetTy, CarryOp); + } else { + // Out of bounds - use fill value for this k + ResultForK = FillValue; + } + + // Select this result if WordShift equals k + InBoundsResult = + MIRBuilder + .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult) + .getReg(0); + } + + // Handle zero-shift special case: if shift is 0, use original input + DstParts[I] = + MIRBuilder + .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult) + .getReg(0); + } + + MIRBuilder.buildMergeLikeInstr(DstReg, DstParts); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/split-wide-shifts-multiway.ll b/llvm/test/CodeGen/AArch64/GlobalISel/split-wide-shifts-multiway.ll new file mode 100644 index 0000000000000..ed68723e470a2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/split-wide-shifts-multiway.ll @@ -0,0 +1,6333 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc %s -o - | FileCheck %s --check-prefixes CHECK,SDAG +; RUN: llc %s -global-isel -global-isel-abort=1 -o - | FileCheck %s --check-prefixes CHECK,GISEL +target datalayout = "e-m:o-i64:64-i512:128-n32:64-S128" +target triple = "arm64-apple-macosx14.0.0" + +define void @test_shl_i512(ptr %result, ptr %input, i32 %shift) { +; SDAG-LABEL: test_shl_i512: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: sub sp, sp, #128 +; SDAG-NEXT: .cfi_def_cfa_offset 128 +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: movi.2d v0, #0000000000000000 +; SDAG-NEXT: ldp q1, q2, [x1] +; SDAG-NEXT: mvn w14, w2 +; SDAG-NEXT: ldr q3, [x1, #32] +; SDAG-NEXT: stp x9, x8, [sp, #112] +; SDAG-NEXT: mov w8, w2 +; SDAG-NEXT: mov x9, sp +; SDAG-NEXT: lsr x10, x8, #3 +; SDAG-NEXT: add x9, x9, #64 +; SDAG-NEXT: stp q0, q0, [sp] +; SDAG-NEXT: stp q0, q0, [sp, #32] +; SDAG-NEXT: and x3, x8, #0x3f +; SDAG-NEXT: and x10, x10, #0x38 +; SDAG-NEXT: stp q2, q3, [sp, #80] +; SDAG-NEXT: eor x3, x3, #0x3f +; SDAG-NEXT: sub x10, x9, x10 +; SDAG-NEXT: str q1, [sp, #64] +; SDAG-NEXT: ldp x9, x11, [x10] +; SDAG-NEXT: ldp x13, x12, [x10, #16] +; SDAG-NEXT: ldp x17, x16, [x10, #32] +; SDAG-NEXT: ldp x10, x2, [x10, #48] +; SDAG-NEXT: lsr x15, x11, #1 +; SDAG-NEXT: lsr x1, x12, #1 +; SDAG-NEXT: lsl x11, x11, x8 +; SDAG-NEXT: lsl x12, x12, x8 +; SDAG-NEXT: lsr x4, x16, #1 +; SDAG-NEXT: lsr x15, x15, x14 +; SDAG-NEXT: lsl x5, x17, x8 +; SDAG-NEXT: lsr x6, x10, #1 +; SDAG-NEXT: lsr x1, x1, x14 +; SDAG-NEXT: lsl x10, x10, x8 +; SDAG-NEXT: lsr x14, x4, x14 +; SDAG-NEXT: lsl x2, x2, x8 +; SDAG-NEXT: lsl x16, x16, x8 +; SDAG-NEXT: lsr x4, x6, x3 +; SDAG-NEXT: orr x1, x5, x1 +; SDAG-NEXT: orr x10, x10, x14 +; SDAG-NEXT: lsr x14, x17, #1 +; SDAG-NEXT: orr x17, x2, x4 +; SDAG-NEXT: lsr x2, x9, #1 +; SDAG-NEXT: stp x10, x17, [x0, #48] +; SDAG-NEXT: lsr x10, x13, #1 +; SDAG-NEXT: lsr x14, x14, x3 +; SDAG-NEXT: lsl x13, x13, x8 +; SDAG-NEXT: lsl x8, x9, x8 +; SDAG-NEXT: lsr x10, x10, x3 +; SDAG-NEXT: orr x14, x16, x14 +; SDAG-NEXT: lsr x16, x2, x3 +; SDAG-NEXT: orr x13, x13, x15 +; SDAG-NEXT: stp x1, x14, [x0, #32] +; SDAG-NEXT: orr x10, x12, x10 +; SDAG-NEXT: orr x9, x11, x16 +; SDAG-NEXT: stp x13, x10, [x0, #16] +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: add sp, sp, #128 +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: stp x28, x27, [sp, #-80]! ; 16-byte Folded Spill +; GISEL-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill +; GISEL-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill +; GISEL-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 80 +; GISEL-NEXT: .cfi_offset w19, -8 +; GISEL-NEXT: .cfi_offset w20, -16 +; GISEL-NEXT: .cfi_offset w21, -24 +; GISEL-NEXT: .cfi_offset w22, -32 +; GISEL-NEXT: .cfi_offset w23, -40 +; GISEL-NEXT: .cfi_offset w24, -48 +; GISEL-NEXT: .cfi_offset w25, -56 +; GISEL-NEXT: .cfi_offset w26, -64 +; GISEL-NEXT: .cfi_offset w27, -72 +; GISEL-NEXT: .cfi_offset w28, -80 +; GISEL-NEXT: ldp x11, x15, [x1] +; GISEL-NEXT: mov w8, w2 +; GISEL-NEXT: lsr x9, x8, #6 +; GISEL-NEXT: and x14, x8, #0x3f +; GISEL-NEXT: mov w13, #64 ; =0x40 +; GISEL-NEXT: sub x16, x13, x14 +; GISEL-NEXT: ldp x3, x6, [x1, #16] +; GISEL-NEXT: lsl x10, x11, x14 +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: ldp x20, x21, [x1, #32] +; GISEL-NEXT: csel x12, x10, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: lsr x26, x21, x16 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x13, xzr, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsr x12, x11, x16 +; GISEL-NEXT: csel x13, x11, x13, eq +; GISEL-NEXT: lsl x11, x15, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x17, xzr, x12, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x17, x11, x17 +; GISEL-NEXT: csel x17, x17, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: csel x17, x10, x17, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x2, xzr, x17, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsr x17, x15, x16 +; GISEL-NEXT: csel x15, x15, x2, eq +; GISEL-NEXT: lsl x2, x3, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x4, xzr, x17, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x4, x2, x4 +; GISEL-NEXT: csel x4, x4, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x5, xzr, x12, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x5, x11, x5 +; GISEL-NEXT: csel x4, x5, x4, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x4, x10, x4, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x4, xzr, x4, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x4, xzr, x4, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x4, xzr, x4, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x4, xzr, x4, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x5, xzr, x4, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsr x4, x3, x16 +; GISEL-NEXT: csel x3, x3, x5, eq +; GISEL-NEXT: lsl x5, x6, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x7, xzr, x4, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x7, x5, x7 +; GISEL-NEXT: csel x7, x7, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x17, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x19, x2, x19 +; GISEL-NEXT: csel x7, x19, x7, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x12, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x19, x11, x19 +; GISEL-NEXT: csel x7, x19, x7, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x7, x10, x7, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x7, xzr, x7, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x7, xzr, x7, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x7, xzr, x7, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x19, xzr, x7, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsr x7, x6, x16 +; GISEL-NEXT: csel x6, x6, x19, eq +; GISEL-NEXT: lsl x19, x20, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x7, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x22, x19, x22 +; GISEL-NEXT: csel x22, x22, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x23, xzr, x4, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x23, x5, x23 +; GISEL-NEXT: csel x22, x23, x22, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x23, xzr, x17, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x23, x2, x23 +; GISEL-NEXT: csel x22, x23, x22, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x23, xzr, x12, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x23, x11, x23 +; GISEL-NEXT: csel x22, x23, x22, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x22, x10, x22, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x22, xzr, x22, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x22, xzr, x22, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x23, xzr, x22, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsr x22, x20, x16 +; GISEL-NEXT: csel x20, x20, x23, eq +; GISEL-NEXT: lsl x23, x21, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x22, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x24, x23, x24 +; GISEL-NEXT: csel x24, x24, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x25, xzr, x7, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x25, x19, x25 +; GISEL-NEXT: csel x24, x25, x24, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x25, xzr, x4, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x25, x5, x25 +; GISEL-NEXT: csel x24, x25, x24, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x25, xzr, x17, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x25, x2, x25 +; GISEL-NEXT: csel x24, x25, x24, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x25, xzr, x12, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x25, x11, x25 +; GISEL-NEXT: csel x24, x25, x24, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x24, x10, x24, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: ldp x25, x1, [x1, #48] +; GISEL-NEXT: csel x24, xzr, x24, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: stp x13, x15, [x0] +; GISEL-NEXT: csel x24, xzr, x24, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: stp x3, x6, [x0, #16] +; GISEL-NEXT: csel x21, x21, x24, eq +; GISEL-NEXT: lsl x24, x25, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x27, xzr, x26, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: lsr x16, x25, x16 +; GISEL-NEXT: orr x27, x24, x27 +; GISEL-NEXT: lsl x14, x1, x14 +; GISEL-NEXT: stp x20, x21, [x0, #32] +; GISEL-NEXT: csel x27, x27, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x28, xzr, x22, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x28, x23, x28 +; GISEL-NEXT: csel x27, x28, x27, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x28, xzr, x7, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x28, x19, x28 +; GISEL-NEXT: csel x27, x28, x27, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x28, xzr, x4, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x28, x5, x28 +; GISEL-NEXT: csel x27, x28, x27, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x28, xzr, x17, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x28, x2, x28 +; GISEL-NEXT: csel x27, x28, x27, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x28, xzr, x12, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x28, x11, x28 +; GISEL-NEXT: csel x27, x28, x27, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x27, x10, x27, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x27, xzr, x27, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x25, x25, x27, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x16, xzr, x16, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x14, x14, x16 +; GISEL-NEXT: csel x14, x14, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x16, xzr, x26, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x16, x24, x16 +; GISEL-NEXT: csel x14, x16, x14, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x16, xzr, x22, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x16, x23, x16 +; GISEL-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; GISEL-NEXT: csel x14, x16, x14, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x16, xzr, x7, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x16, x19, x16 +; GISEL-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; GISEL-NEXT: csel x14, x16, x14, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x16, xzr, x4, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x16, x5, x16 +; GISEL-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; GISEL-NEXT: csel x14, x16, x14, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x16, xzr, x17, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x16, x2, x16 +; GISEL-NEXT: csel x13, x16, x14, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x11, x11, x12 +; GISEL-NEXT: csel x11, x11, x13, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x9, x10, x11, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x8, x1, x9, eq +; GISEL-NEXT: stp x25, x8, [x0, #48] +; GISEL-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x28, x27, [sp], #80 ; 16-byte Folded Reload +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + + %shift_ext = zext i32 %shift to i512 + %shifted = shl i512 %input_val, %shift_ext + + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512(ptr %result, ptr %input, i32 %shift) { +; SDAG-LABEL: test_lshr_i512: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: sub sp, sp, #128 +; SDAG-NEXT: .cfi_def_cfa_offset 128 +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: movi.2d v0, #0000000000000000 +; SDAG-NEXT: ldp q1, q2, [x1] +; SDAG-NEXT: mvn w11, w2 +; SDAG-NEXT: ldr q3, [x1, #32] +; SDAG-NEXT: stp x9, x8, [sp, #48] +; SDAG-NEXT: mov w8, w2 +; SDAG-NEXT: lsr x10, x8, #3 +; SDAG-NEXT: stp q2, q3, [sp, #16] +; SDAG-NEXT: and x3, x8, #0x3f +; SDAG-NEXT: stp q0, q0, [sp, #64] +; SDAG-NEXT: eor x3, x3, #0x3f +; SDAG-NEXT: and x9, x10, #0x38 +; SDAG-NEXT: mov x10, sp +; SDAG-NEXT: stp q0, q0, [sp, #96] +; SDAG-NEXT: add x10, x10, x9 +; SDAG-NEXT: str q1, [sp] +; SDAG-NEXT: ldp x13, x16, [x10, #48] +; SDAG-NEXT: ldp x9, x14, [x10, #16] +; SDAG-NEXT: ldp x12, x17, [x10, #32] +; SDAG-NEXT: lsl x4, x16, #1 +; SDAG-NEXT: lsl x2, x13, #1 +; SDAG-NEXT: lsr x13, x13, x8 +; SDAG-NEXT: lsl x15, x9, #1 +; SDAG-NEXT: lsr x16, x16, x8 +; SDAG-NEXT: lsr x9, x9, x8 +; SDAG-NEXT: lsl x1, x12, #1 +; SDAG-NEXT: lsl x4, x4, x3 +; SDAG-NEXT: lsr x12, x12, x8 +; SDAG-NEXT: lsl x15, x15, x11 +; SDAG-NEXT: lsl x1, x1, x11 +; SDAG-NEXT: lsl x11, x2, x11 +; SDAG-NEXT: lsl x2, x17, #1 +; SDAG-NEXT: orr x13, x4, x13 +; SDAG-NEXT: ldp x10, x4, [x10] +; SDAG-NEXT: lsr x17, x17, x8 +; SDAG-NEXT: lsl x2, x2, x3 +; SDAG-NEXT: stp x13, x16, [x0, #48] +; SDAG-NEXT: lsl x16, x14, #1 +; SDAG-NEXT: lsr x14, x14, x8 +; SDAG-NEXT: lsl x13, x4, #1 +; SDAG-NEXT: orr x11, x17, x11 +; SDAG-NEXT: orr x12, x2, x12 +; SDAG-NEXT: lsl x16, x16, x3 +; SDAG-NEXT: lsr x10, x10, x8 +; SDAG-NEXT: stp x12, x11, [x0, #32] +; SDAG-NEXT: lsl x12, x13, x3 +; SDAG-NEXT: lsr x8, x4, x8 +; SDAG-NEXT: orr x11, x14, x1 +; SDAG-NEXT: orr x9, x16, x9 +; SDAG-NEXT: stp x9, x11, [x0, #16] +; SDAG-NEXT: orr x9, x12, x10 +; SDAG-NEXT: orr x8, x8, x15 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: add sp, sp, #128 +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: stp x26, x25, [sp, #-64]! ; 16-byte Folded Spill +; GISEL-NEXT: stp x24, x23, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill +; GISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 64 +; GISEL-NEXT: .cfi_offset w19, -8 +; GISEL-NEXT: .cfi_offset w20, -16 +; GISEL-NEXT: .cfi_offset w21, -24 +; GISEL-NEXT: .cfi_offset w22, -32 +; GISEL-NEXT: .cfi_offset w23, -40 +; GISEL-NEXT: .cfi_offset w24, -48 +; GISEL-NEXT: .cfi_offset w25, -56 +; GISEL-NEXT: .cfi_offset w26, -64 +; GISEL-NEXT: mov w8, w2 +; GISEL-NEXT: ldp x13, x2, [x1] +; GISEL-NEXT: mov w9, #64 ; =0x40 +; GISEL-NEXT: and x14, x8, #0x3f +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: sub x17, x9, x14 +; GISEL-NEXT: ldp x5, x16, [x1, #16] +; GISEL-NEXT: lsl x10, x2, x17 +; GISEL-NEXT: lsr x9, x8, #6 +; GISEL-NEXT: lsr x11, x13, x14 +; GISEL-NEXT: lsr x24, x2, x14 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: lsl x23, x5, x17 +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: lsl x22, x16, x17 +; GISEL-NEXT: lsr x21, x5, x14 +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x20, x16, x14 +; GISEL-NEXT: csel x11, xzr, x23, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x11, x24, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldp x15, x11, [x1, #32] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x22, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: lsl x19, x15, x17 +; GISEL-NEXT: orr x12, x21, x12 +; GISEL-NEXT: lsl x6, x11, x17 +; GISEL-NEXT: csel x10, x12, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x7, x15, x14 +; GISEL-NEXT: csel x12, xzr, x19, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: csel x4, x12, x10, eq +; GISEL-NEXT: ldp x12, x10, [x1, #48] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x6, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x1, x7, x1 +; GISEL-NEXT: lsl x3, x12, x17 +; GISEL-NEXT: lsl x17, x10, x17 +; GISEL-NEXT: csel x1, x1, x4, eq +; GISEL-NEXT: lsr x4, x11, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x25, xzr, x3, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x25, x4, x25 +; GISEL-NEXT: csel x25, x25, x1, eq +; GISEL-NEXT: lsr x1, x12, x14 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x26, xzr, x17, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: lsr x14, x10, x14 +; GISEL-NEXT: orr x26, x1, x26 +; GISEL-NEXT: csel x25, x26, x25, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x25, x14, x25, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x13, x13, x25, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x23, xzr, x23, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x23, x24, x23 +; GISEL-NEXT: csel x23, x23, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x22, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x24, x21, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x19, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x24, x20, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x6, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x24, x7, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x3, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x24, x4, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x17, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x24, x1, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x23, x14, x23, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x23, xzr, x23, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x2, x2, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x22, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: stp x13, x2, [x0] +; GISEL-NEXT: orr x21, x21, x22 +; GISEL-NEXT: ldp x24, x23, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: csel x21, x21, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x19, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x22, x20, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x6, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x22, x7, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x3, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x22, x4, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x17, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x22, x1, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x21, x14, x21, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x21, xzr, x21, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x21, xzr, x21, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x5, x5, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x19, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x19, x20, x19 +; GISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload +; GISEL-NEXT: csel x19, x19, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x6, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x20, x7, x20 +; GISEL-NEXT: csel x19, x20, x19, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x3, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x20, x4, x20 +; GISEL-NEXT: csel x19, x20, x19, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x17, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x20, x1, x20 +; GISEL-NEXT: csel x19, x20, x19, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x19, x14, x19, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x19, xzr, x19, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x19, xzr, x19, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x19, xzr, x19, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x16, x16, x19, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x6, xzr, x6, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: stp x5, x16, [x0, #16] +; GISEL-NEXT: orr x6, x7, x6 +; GISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload +; GISEL-NEXT: csel x6, x6, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x7, xzr, x3, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x7, x4, x7 +; GISEL-NEXT: csel x6, x7, x6, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x7, xzr, x17, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x7, x1, x7 +; GISEL-NEXT: csel x6, x7, x6, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x6, x14, x6, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x6, xzr, x6, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x6, xzr, x6, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x6, xzr, x6, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x6, xzr, x6, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x15, x15, x6, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x3, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x3, x4, x3 +; GISEL-NEXT: csel x3, x3, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x4, xzr, x17, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x4, x1, x4 +; GISEL-NEXT: csel x3, x4, x3, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x3, x14, x3, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x3, xzr, x3, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x3, xzr, x3, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x3, xzr, x3, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x3, xzr, x3, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x3, xzr, x3, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x11, x11, x3, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: stp x15, x11, [x0, #32] +; GISEL-NEXT: orr x17, x1, x17 +; GISEL-NEXT: csel x17, x17, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: csel x17, x14, x17, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x12, x12, x17, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: csel x14, x14, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x13, xzr, x14, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x9, xzr, x13, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x8, x10, x9, eq +; GISEL-NEXT: stp x12, x8, [x0, #48] +; GISEL-NEXT: ldp x26, x25, [sp], #64 ; 16-byte Folded Reload +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shift_ext = zext i32 %shift to i512 + %shifted = lshr i512 %input_val, %shift_ext + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512(ptr %result, ptr %input, i32 %shift) { +; SDAG-LABEL: test_ashr_i512: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: sub sp, sp, #128 +; SDAG-NEXT: .cfi_def_cfa_offset 128 +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: mov x11, sp +; SDAG-NEXT: ldp q0, q1, [x1] +; SDAG-NEXT: ldr q2, [x1, #32] +; SDAG-NEXT: stp x9, x8, [sp, #48] +; SDAG-NEXT: asr x9, x8, #63 +; SDAG-NEXT: mov w8, w2 +; SDAG-NEXT: lsr x10, x8, #3 +; SDAG-NEXT: stp q1, q2, [sp, #16] +; SDAG-NEXT: and x3, x8, #0x3f +; SDAG-NEXT: str q0, [sp] +; SDAG-NEXT: eor x3, x3, #0x3f +; SDAG-NEXT: and x10, x10, #0x38 +; SDAG-NEXT: stp x9, x9, [sp, #112] +; SDAG-NEXT: stp x9, x9, [sp, #96] +; SDAG-NEXT: add x10, x11, x10 +; SDAG-NEXT: mvn w11, w2 +; SDAG-NEXT: stp x9, x9, [sp, #80] +; SDAG-NEXT: stp x9, x9, [sp, #64] +; SDAG-NEXT: ldp x13, x16, [x10, #48] +; SDAG-NEXT: ldp x9, x14, [x10, #16] +; SDAG-NEXT: ldp x12, x17, [x10, #32] +; SDAG-NEXT: lsl x4, x16, #1 +; SDAG-NEXT: lsl x2, x13, #1 +; SDAG-NEXT: lsr x13, x13, x8 +; SDAG-NEXT: lsl x15, x9, #1 +; SDAG-NEXT: asr x16, x16, x8 +; SDAG-NEXT: lsr x9, x9, x8 +; SDAG-NEXT: lsl x1, x12, #1 +; SDAG-NEXT: lsl x4, x4, x3 +; SDAG-NEXT: lsr x12, x12, x8 +; SDAG-NEXT: lsl x15, x15, x11 +; SDAG-NEXT: lsl x1, x1, x11 +; SDAG-NEXT: lsl x11, x2, x11 +; SDAG-NEXT: lsl x2, x17, #1 +; SDAG-NEXT: orr x13, x4, x13 +; SDAG-NEXT: ldp x10, x4, [x10] +; SDAG-NEXT: lsr x17, x17, x8 +; SDAG-NEXT: lsl x2, x2, x3 +; SDAG-NEXT: stp x13, x16, [x0, #48] +; SDAG-NEXT: lsl x16, x14, #1 +; SDAG-NEXT: lsr x14, x14, x8 +; SDAG-NEXT: lsl x13, x4, #1 +; SDAG-NEXT: orr x11, x17, x11 +; SDAG-NEXT: orr x12, x2, x12 +; SDAG-NEXT: lsl x16, x16, x3 +; SDAG-NEXT: lsr x10, x10, x8 +; SDAG-NEXT: stp x12, x11, [x0, #32] +; SDAG-NEXT: lsl x12, x13, x3 +; SDAG-NEXT: lsr x8, x4, x8 +; SDAG-NEXT: orr x11, x14, x1 +; SDAG-NEXT: orr x9, x16, x9 +; SDAG-NEXT: stp x9, x11, [x0, #16] +; SDAG-NEXT: orr x9, x12, x10 +; SDAG-NEXT: orr x8, x8, x15 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: add sp, sp, #128 +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: stp x28, x27, [sp, #-80]! ; 16-byte Folded Spill +; GISEL-NEXT: stp x26, x25, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: stp x24, x23, [sp, #32] ; 16-byte Folded Spill +; GISEL-NEXT: stp x22, x21, [sp, #48] ; 16-byte Folded Spill +; GISEL-NEXT: stp x20, x19, [sp, #64] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 80 +; GISEL-NEXT: .cfi_offset w19, -8 +; GISEL-NEXT: .cfi_offset w20, -16 +; GISEL-NEXT: .cfi_offset w21, -24 +; GISEL-NEXT: .cfi_offset w22, -32 +; GISEL-NEXT: .cfi_offset w23, -40 +; GISEL-NEXT: .cfi_offset w24, -48 +; GISEL-NEXT: .cfi_offset w25, -56 +; GISEL-NEXT: .cfi_offset w26, -64 +; GISEL-NEXT: .cfi_offset w27, -72 +; GISEL-NEXT: .cfi_offset w28, -80 +; GISEL-NEXT: mov w8, w2 +; GISEL-NEXT: ldp x14, x4, [x1] +; GISEL-NEXT: mov w9, #64 ; =0x40 +; GISEL-NEXT: and x16, x8, #0x3f +; GISEL-NEXT: lsr x10, x8, #6 +; GISEL-NEXT: sub x15, x9, x16 +; GISEL-NEXT: ldr x9, [x1, #56] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x12, x4, x15 +; GISEL-NEXT: ldp x7, x3, [x1, #16] +; GISEL-NEXT: lsr x13, x14, x16 +; GISEL-NEXT: asr x11, x9, #63 +; GISEL-NEXT: lsr x26, x4, x16 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: lsl x25, x7, x15 +; GISEL-NEXT: orr x12, x13, x12 +; GISEL-NEXT: lsl x23, x3, x15 +; GISEL-NEXT: csel x12, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x24, x7, x16 +; GISEL-NEXT: csel x13, xzr, x25, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: lsr x22, x3, x16 +; GISEL-NEXT: orr x13, x26, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: ldp x17, x13, [x1, #32] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x2, xzr, x23, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x2, x24, x2 +; GISEL-NEXT: lsl x21, x17, x15 +; GISEL-NEXT: lsl x19, x13, x15 +; GISEL-NEXT: csel x2, x2, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x12, [x1, #48] +; GISEL-NEXT: csel x1, xzr, x21, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: lsr x20, x17, x16 +; GISEL-NEXT: orr x1, x22, x1 +; GISEL-NEXT: lsl x5, x12, x15 +; GISEL-NEXT: lsr x6, x13, x16 +; GISEL-NEXT: csel x1, x1, x2, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x2, xzr, x19, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x2, x20, x2 +; GISEL-NEXT: csel x2, x2, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x5, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x27, x6, x1 +; GISEL-NEXT: lsl x1, x9, x15 +; GISEL-NEXT: lsl x15, x11, x15 +; GISEL-NEXT: csel x27, x27, x2, eq +; GISEL-NEXT: lsr x2, x12, x16 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x28, xzr, x1, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: lsr x16, x9, x16 +; GISEL-NEXT: orr x28, x2, x28 +; GISEL-NEXT: csel x27, x28, x27, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x28, xzr, x15, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x28, x16, x28 +; GISEL-NEXT: csel x27, x28, x27, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x14, x14, x27, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x25, xzr, x25, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x25, x26, x25 +; GISEL-NEXT: csel x25, x25, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x26, xzr, x23, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x26, x24, x26 +; GISEL-NEXT: csel x25, x26, x25, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x26, xzr, x21, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x26, x22, x26 +; GISEL-NEXT: csel x25, x26, x25, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x26, xzr, x19, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x26, x20, x26 +; GISEL-NEXT: csel x25, x26, x25, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x26, xzr, x5, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x26, x6, x26 +; GISEL-NEXT: csel x25, x26, x25, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x26, xzr, x1, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x26, x2, x26 +; GISEL-NEXT: csel x25, x26, x25, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x26, xzr, x15, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x26, x16, x26 +; GISEL-NEXT: csel x25, x26, x25, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x25, x11, x25, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x4, x4, x25, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x23, xzr, x23, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: stp x14, x4, [x0] +; GISEL-NEXT: orr x23, x24, x23 +; GISEL-NEXT: ldp x26, x25, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: csel x23, x23, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x21, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x24, x22, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x19, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x24, x20, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x5, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x24, x6, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x1, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x24, x2, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x24, xzr, x15, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x24, x16, x24 +; GISEL-NEXT: csel x23, x24, x23, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x23, x11, x23, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x23, x11, x23, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x7, x7, x23, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x21, xzr, x21, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x21, x22, x21 +; GISEL-NEXT: ldp x24, x23, [sp, #32] ; 16-byte Folded Reload +; GISEL-NEXT: csel x21, x21, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x19, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x22, x20, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x5, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x22, x6, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x1, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x22, x2, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x22, xzr, x15, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x22, x16, x22 +; GISEL-NEXT: csel x21, x22, x21, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x21, x11, x21, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x21, x11, x21, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x21, x11, x21, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x3, x3, x21, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x19, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: stp x7, x3, [x0, #16] +; GISEL-NEXT: orr x19, x20, x19 +; GISEL-NEXT: ldp x22, x21, [sp, #48] ; 16-byte Folded Reload +; GISEL-NEXT: csel x19, x19, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x5, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x20, x6, x20 +; GISEL-NEXT: csel x19, x20, x19, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x1, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x20, x2, x20 +; GISEL-NEXT: csel x19, x20, x19, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x15, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x20, x16, x20 +; GISEL-NEXT: csel x19, x20, x19, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x19, x11, x19, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x19, x11, x19, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x19, x11, x19, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x19, x11, x19, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x17, x17, x19, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x5, xzr, x5, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x5, x6, x5 +; GISEL-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload +; GISEL-NEXT: csel x5, x5, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x6, xzr, x1, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x6, x2, x6 +; GISEL-NEXT: csel x5, x6, x5, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x6, xzr, x15, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x6, x16, x6 +; GISEL-NEXT: csel x5, x6, x5, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: csel x5, x11, x5, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x5, x11, x5, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x5, x11, x5, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x5, x11, x5, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x5, x11, x5, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x13, x13, x5, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: stp x17, x13, [x0, #32] +; GISEL-NEXT: orr x1, x2, x1 +; GISEL-NEXT: csel x1, x1, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x2, xzr, x15, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x2, x16, x2 +; GISEL-NEXT: csel x1, x2, x1, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: csel x1, x11, x1, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: csel x1, x11, x1, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x1, x11, x1, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x1, x11, x1, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x1, x11, x1, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x1, x11, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x12, x12, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x15, x16, x15 +; GISEL-NEXT: csel x15, x15, x11, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: csel x15, x11, x15, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: csel x15, x11, x15, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: csel x14, x11, x15, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x14, x11, x14, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x14, x11, x14, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x14, x11, x14, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x10, x11, x14, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x8, x9, x10, eq +; GISEL-NEXT: stp x12, x8, [x0, #48] +; GISEL-NEXT: ldp x28, x27, [sp], #80 ; 16-byte Folded Reload +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shift_ext = zext i32 %shift to i512 + %shifted = ashr i512 %input_val, %shift_ext + store i512 %shifted, ptr %result, align 64 + ret void +} + +; i1024 shift functions +define void @test_shl_i1024(ptr %result, ptr %input, i32 %shift) { +; SDAG-LABEL: test_shl_i1024: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: sub sp, sp, #352 +; SDAG-NEXT: stp x28, x27, [sp, #256] ; 16-byte Folded Spill +; SDAG-NEXT: stp x26, x25, [sp, #272] ; 16-byte Folded Spill +; SDAG-NEXT: stp x24, x23, [sp, #288] ; 16-byte Folded Spill +; SDAG-NEXT: stp x22, x21, [sp, #304] ; 16-byte Folded Spill +; SDAG-NEXT: stp x20, x19, [sp, #320] ; 16-byte Folded Spill +; SDAG-NEXT: stp x29, x30, [sp, #336] ; 16-byte Folded Spill +; SDAG-NEXT: .cfi_def_cfa_offset 352 +; SDAG-NEXT: .cfi_offset w30, -8 +; SDAG-NEXT: .cfi_offset w29, -16 +; SDAG-NEXT: .cfi_offset w19, -24 +; SDAG-NEXT: .cfi_offset w20, -32 +; SDAG-NEXT: .cfi_offset w21, -40 +; SDAG-NEXT: .cfi_offset w22, -48 +; SDAG-NEXT: .cfi_offset w23, -56 +; SDAG-NEXT: .cfi_offset w24, -64 +; SDAG-NEXT: .cfi_offset w25, -72 +; SDAG-NEXT: .cfi_offset w26, -80 +; SDAG-NEXT: .cfi_offset w27, -88 +; SDAG-NEXT: .cfi_offset w28, -96 +; SDAG-NEXT: ldp x8, x9, [x1, #112] +; SDAG-NEXT: movi.2d v0, #0000000000000000 +; SDAG-NEXT: ldp q1, q2, [x1] +; SDAG-NEXT: mov x10, sp +; SDAG-NEXT: ldp q3, q4, [x1, #32] +; SDAG-NEXT: add x10, x10, #128 +; SDAG-NEXT: ldp q5, q6, [x1, #64] +; SDAG-NEXT: mvn w4, w2 +; SDAG-NEXT: ldr q7, [x1, #96] +; SDAG-NEXT: stp x8, x9, [sp, #240] +; SDAG-NEXT: mov w8, w2 +; SDAG-NEXT: lsr x9, x8, #3 +; SDAG-NEXT: stp q0, q0, [sp] +; SDAG-NEXT: stp q0, q0, [sp, #32] +; SDAG-NEXT: ldp x29, x30, [sp, #336] ; 16-byte Folded Reload +; SDAG-NEXT: and x9, x9, #0x78 +; SDAG-NEXT: stp q0, q0, [sp, #64] +; SDAG-NEXT: stp q0, q0, [sp, #96] +; SDAG-NEXT: sub x1, x10, x9 +; SDAG-NEXT: and x10, x8, #0x3f +; SDAG-NEXT: stp q2, q3, [sp, #144] +; SDAG-NEXT: eor x10, x10, #0x3f +; SDAG-NEXT: stp q4, q5, [sp, #176] +; SDAG-NEXT: stp q6, q7, [sp, #208] +; SDAG-NEXT: str q1, [sp, #128] +; SDAG-NEXT: ldp x6, x19, [x1, #64] +; SDAG-NEXT: ldr x26, [x1, #96] +; SDAG-NEXT: ldp x22, x23, [x1, #80] +; SDAG-NEXT: ldp x27, x24, [x1, #104] +; SDAG-NEXT: lsr x20, x6, #1 +; SDAG-NEXT: lsr x21, x19, #1 +; SDAG-NEXT: lsl x19, x19, x8 +; SDAG-NEXT: ldp x9, x13, [x1] +; SDAG-NEXT: lsl x25, x22, x8 +; SDAG-NEXT: lsr x20, x20, x10 +; SDAG-NEXT: ldp x11, x14, [x1, #16] +; SDAG-NEXT: ldp x12, x15, [x1, #32] +; SDAG-NEXT: lsr x21, x21, x4 +; SDAG-NEXT: ldp x17, x2, [x1, #48] +; SDAG-NEXT: orr x19, x19, x20 +; SDAG-NEXT: ldr x1, [x1, #120] +; SDAG-NEXT: lsr x20, x24, #1 +; SDAG-NEXT: lsr x16, x13, #1 +; SDAG-NEXT: lsr x3, x14, #1 +; SDAG-NEXT: lsr x5, x15, #1 +; SDAG-NEXT: orr x21, x25, x21 +; SDAG-NEXT: lsr x7, x2, #1 +; SDAG-NEXT: lsr x25, x23, #1 +; SDAG-NEXT: lsr x28, x27, #1 +; SDAG-NEXT: lsl x1, x1, x8 +; SDAG-NEXT: lsr x20, x20, x10 +; SDAG-NEXT: lsr x16, x16, x4 +; SDAG-NEXT: lsr x3, x3, x4 +; SDAG-NEXT: lsr x5, x5, x4 +; SDAG-NEXT: lsr x7, x7, x4 +; SDAG-NEXT: lsr x22, x22, #1 +; SDAG-NEXT: lsr x25, x25, x4 +; SDAG-NEXT: lsr x4, x28, x4 +; SDAG-NEXT: orr x1, x1, x20 +; SDAG-NEXT: lsl x20, x23, x8 +; SDAG-NEXT: lsl x23, x24, x8 +; SDAG-NEXT: lsr x28, x26, #1 +; SDAG-NEXT: lsr x22, x22, x10 +; SDAG-NEXT: lsl x24, x27, x8 +; SDAG-NEXT: orr x4, x23, x4 +; SDAG-NEXT: lsl x6, x6, x8 +; SDAG-NEXT: lsl x2, x2, x8 +; SDAG-NEXT: lsr x27, x28, x10 +; SDAG-NEXT: stp x4, x1, [x0, #112] +; SDAG-NEXT: lsl x1, x26, x8 +; SDAG-NEXT: orr x20, x20, x22 +; SDAG-NEXT: lsr x4, x9, #1 +; SDAG-NEXT: lsl x13, x13, x8 +; SDAG-NEXT: orr x22, x24, x27 +; SDAG-NEXT: orr x1, x1, x25 +; SDAG-NEXT: stp x21, x20, [x0, #80] +; SDAG-NEXT: lsr x20, x17, #1 +; SDAG-NEXT: stp x1, x22, [x0, #96] +; SDAG-NEXT: lsr x1, x11, #1 +; SDAG-NEXT: lsr x21, x12, #1 +; SDAG-NEXT: lsl x14, x14, x8 +; SDAG-NEXT: lsl x15, x15, x8 +; SDAG-NEXT: lsr x20, x20, x10 +; SDAG-NEXT: lsl x17, x17, x8 +; SDAG-NEXT: orr x6, x6, x7 +; SDAG-NEXT: lsr x7, x21, x10 +; SDAG-NEXT: lsl x12, x12, x8 +; SDAG-NEXT: lsr x1, x1, x10 +; SDAG-NEXT: lsl x11, x11, x8 +; SDAG-NEXT: lsr x10, x4, x10 +; SDAG-NEXT: stp x6, x19, [x0, #64] +; SDAG-NEXT: orr x2, x2, x20 +; SDAG-NEXT: lsl x8, x9, x8 +; SDAG-NEXT: orr x17, x17, x5 +; SDAG-NEXT: ldp x20, x19, [sp, #320] ; 16-byte Folded Reload +; SDAG-NEXT: orr x15, x15, x7 +; SDAG-NEXT: ldp x22, x21, [sp, #304] ; 16-byte Folded Reload +; SDAG-NEXT: orr x12, x12, x3 +; SDAG-NEXT: ldp x24, x23, [sp, #288] ; 16-byte Folded Reload +; SDAG-NEXT: orr x14, x14, x1 +; SDAG-NEXT: ldp x26, x25, [sp, #272] ; 16-byte Folded Reload +; SDAG-NEXT: orr x11, x11, x16 +; SDAG-NEXT: ldp x28, x27, [sp, #256] ; 16-byte Folded Reload +; SDAG-NEXT: orr x9, x13, x10 +; SDAG-NEXT: stp x17, x2, [x0, #48] +; SDAG-NEXT: stp x12, x15, [x0, #32] +; SDAG-NEXT: stp x11, x14, [x0, #16] +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: add sp, sp, #352 +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i1024: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: sub sp, sp, #416 +; GISEL-NEXT: stp x28, x27, [sp, #320] ; 16-byte Folded Spill +; GISEL-NEXT: stp x26, x25, [sp, #336] ; 16-byte Folded Spill +; GISEL-NEXT: stp x24, x23, [sp, #352] ; 16-byte Folded Spill +; GISEL-NEXT: stp x22, x21, [sp, #368] ; 16-byte Folded Spill +; GISEL-NEXT: stp x20, x19, [sp, #384] ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #400] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 416 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w19, -24 +; GISEL-NEXT: .cfi_offset w20, -32 +; GISEL-NEXT: .cfi_offset w21, -40 +; GISEL-NEXT: .cfi_offset w22, -48 +; GISEL-NEXT: .cfi_offset w23, -56 +; GISEL-NEXT: .cfi_offset w24, -64 +; GISEL-NEXT: .cfi_offset w25, -72 +; GISEL-NEXT: .cfi_offset w26, -80 +; GISEL-NEXT: .cfi_offset w27, -88 +; GISEL-NEXT: .cfi_offset w28, -96 +; GISEL-NEXT: ldp x10, x11, [x1] +; GISEL-NEXT: mov w8, w2 +; GISEL-NEXT: lsr x9, x8, #6 +; GISEL-NEXT: and x16, x8, #0x3f +; GISEL-NEXT: mov w13, #64 ; =0x40 +; GISEL-NEXT: sub x21, x13, x16 +; GISEL-NEXT: str x0, [sp, #112] ; 8-byte Folded Spill +; GISEL-NEXT: mov x24, x16 +; GISEL-NEXT: lsl x25, x10, x16 +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: lsr x26, x10, x21 +; GISEL-NEXT: lsl x2, x11, x16 +; GISEL-NEXT: lsr x23, x11, x21 +; GISEL-NEXT: mov x22, x21 +; GISEL-NEXT: csel x12, x25, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: str x1, [sp, #312] ; 8-byte Folded Spill +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: str x23, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: stp x24, x22, [sp, #40] ; 16-byte Folded Spill +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x10, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x10, [sp, #192] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, xzr, x26, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x10, x2, x10 +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: csel x10, x25, x10, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x13, xzr, x10, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: ldp x12, x10, [x1, #16] +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsl x20, x12, x16 +; GISEL-NEXT: csel x11, x11, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x23, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x11, x20, x11 +; GISEL-NEXT: lsr x15, x12, x21 +; GISEL-NEXT: lsl x14, x10, x16 +; GISEL-NEXT: csel x11, x11, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x17, x10, x21 +; GISEL-NEXT: csel x13, xzr, x26, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: str x20, [sp, #8] ; 8-byte Folded Spill +; GISEL-NEXT: orr x13, x2, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x11, x25, x11, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [sp, #176] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x15, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x11, x14, x11 +; GISEL-NEXT: csel x11, x11, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x23, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x26, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x12, x2, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x11, x25, x11, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x13, xzr, x11, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: ldp x12, x11, [x1, #32] +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsl x0, x12, x16 +; GISEL-NEXT: csel x10, x10, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x10, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, xzr, x17, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x10, x0, x10 +; GISEL-NEXT: lsr x27, x12, x21 +; GISEL-NEXT: lsl x19, x11, x16 +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x3, x11, x21 +; GISEL-NEXT: csel x13, xzr, x15, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: stp x27, x0, [sp, #240] ; 16-byte Folded Spill +; GISEL-NEXT: orr x13, x14, x13 +; GISEL-NEXT: mov x7, x3 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x23, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x13, x20, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x26, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x13, x2, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x10, x25, x10, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x12, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x10, [sp, #160] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, xzr, x27, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x10, x19, x10 +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x17, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x12, x0, x12 +; GISEL-NEXT: csel x10, x12, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x15, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x12, x14, x12 +; GISEL-NEXT: csel x10, x12, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x23, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: csel x10, x12, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x26, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x12, x2, x12 +; GISEL-NEXT: csel x10, x12, x10, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x10, x25, x10, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x13, xzr, x10, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: ldp x12, x10, [x1, #48] +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: lsl x4, x12, x16 +; GISEL-NEXT: csel x11, x11, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [sp, #152] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x3, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x11, x4, x11 +; GISEL-NEXT: lsl x30, x10, x16 +; GISEL-NEXT: lsr x28, x10, x21 +; GISEL-NEXT: csel x11, x11, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x27, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: str x30, [sp, #200] ; 8-byte Folded Spill +; GISEL-NEXT: orr x13, x19, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x17, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x13, x0, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x15, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x13, x14, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x23, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x13, x20, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x26, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x13, x2, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: lsr x13, x12, x21 +; GISEL-NEXT: csel x11, x25, x11, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: mov x6, x13 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: str x6, [sp, #256] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [sp, #144] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x13, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x11, x30, x11 +; GISEL-NEXT: csel x11, x11, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x3, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x12, x4, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x27, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x12, x19, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x17, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x12, x0, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x15, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x12, x14, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x23, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x26, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x12, x2, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x11, x25, x11, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x12, xzr, x11, eq +; GISEL-NEXT: ldp x11, x5, [x1, #64] +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x12, x10, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x21, x11, x16 +; GISEL-NEXT: str x12, [sp, #136] ; 8-byte Folded Spill +; GISEL-NEXT: csel x12, xzr, x28, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x12, x21, x12 +; GISEL-NEXT: lsr x10, x11, x22 +; GISEL-NEXT: mov x16, x19 +; GISEL-NEXT: csel x12, x12, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x1, x16 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: str x16, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: orr x13, x30, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x3, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: lsl x3, x5, x24 +; GISEL-NEXT: orr x13, x4, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: stp x21, x3, [sp, #216] ; 16-byte Folded Spill +; GISEL-NEXT: csel x13, xzr, x27, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x13, x19, x13 +; GISEL-NEXT: mov x19, x28 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x17, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x13, x0, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x15, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x13, x14, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x23, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x13, x20, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x26, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x13, x2, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x12, x25, x12, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x11, x11, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [sp, #128] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x10, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x11, x3, x11 +; GISEL-NEXT: csel x11, x11, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x28, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: mov x28, x4 +; GISEL-NEXT: orr x12, x21, x12 +; GISEL-NEXT: str x28, [sp, #32] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x6, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x12, x30, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x7, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x12, x4, x12 +; GISEL-NEXT: mov x4, x20 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x27, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: mov x27, x2 +; GISEL-NEXT: orr x12, x16, x12 +; GISEL-NEXT: mov x16, x17 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x17, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: mov x17, x15 +; GISEL-NEXT: orr x12, x0, x12 +; GISEL-NEXT: lsr x0, x5, x22 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x15, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: ldr x15, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: orr x12, x14, x12 +; GISEL-NEXT: str x0, [sp, #280] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x23, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: mov x23, x25 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: str x23, [sp, #288] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x26, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x12, x2, x12 +; GISEL-NEXT: mov x2, x3 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x11, x25, x11, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: mov x25, x26 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x12, xzr, x11, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x12, x5, x12, eq +; GISEL-NEXT: ldp x11, x5, [x15, #80] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x12, [sp, #120] ; 8-byte Folded Spill +; GISEL-NEXT: mov x15, x7 +; GISEL-NEXT: csel x12, xzr, x0, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: str x15, [sp, #24] ; 8-byte Folded Spill +; GISEL-NEXT: lsl x20, x11, x24 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: str x20, [sp, #232] ; 8-byte Folded Spill +; GISEL-NEXT: csel x12, x12, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x10, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x13, x3, x13 +; GISEL-NEXT: lsl x3, x5, x24 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x19, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: stp x19, x3, [sp, #264] ; 16-byte Folded Spill +; GISEL-NEXT: orr x13, x21, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x6, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x13, x30, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x7, eq +; GISEL-NEXT: ldp x7, x30, [sp, #240] ; 16-byte Folded Reload +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x13, x28, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x7, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x13, x1, x13 +; GISEL-NEXT: mov x1, x14 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x16, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x13, x30, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x17, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x13, x14, x13 +; GISEL-NEXT: ldr x14, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x14, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x13, x4, x13 +; GISEL-NEXT: mov x4, x10 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x26, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: mov x26, x27 +; GISEL-NEXT: orr x13, x27, x13 +; GISEL-NEXT: lsr x27, x11, x22 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: mov x13, x23 +; GISEL-NEXT: csel x12, x23, x12, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: str x27, [sp, #64] ; 8-byte Folded Spill +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: mov x23, x20 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x11, x11, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [sp, #104] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x27, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x11, x3, x11 +; GISEL-NEXT: csel x11, x11, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x0, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: mov x0, x7 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: mov x20, x16 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x10, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: ldr x10, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: orr x12, x2, x12 +; GISEL-NEXT: ldr x2, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x19, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x12, x21, x12 +; GISEL-NEXT: ldr x21, [sp, #200] ; 8-byte Folded Reload +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x6, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x12, x21, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x15, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x12, x28, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x7, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: mov x7, x17 +; GISEL-NEXT: orr x12, x2, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x16, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x12, x30, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x17, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: mov x17, x24 +; GISEL-NEXT: orr x12, x1, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x14, eq +; GISEL-NEXT: ldr x14, [sp, #8] ; 8-byte Folded Reload +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x12, x14, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x25, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x12, x26, x12 +; GISEL-NEXT: csel x11, x12, x11, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x12, xzr, x11, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: ldp x11, x10, [x10, #96] +; GISEL-NEXT: csel x12, x5, x12, eq +; GISEL-NEXT: str x12, [sp, #96] ; 8-byte Folded Spill +; GISEL-NEXT: mov x12, x22 +; GISEL-NEXT: lsr x22, x5, x22 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x5, x27 +; GISEL-NEXT: lsl x24, x11, x24 +; GISEL-NEXT: str x10, [sp, #296] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, xzr, x22, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: str x22, [sp, #16] ; 8-byte Folded Spill +; GISEL-NEXT: orr x10, x24, x10 +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x27, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: ldr x27, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: orr x13, x3, x13 +; GISEL-NEXT: mov x3, x26 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x27, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x13, x23, x13 +; GISEL-NEXT: mov x23, x4 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x4, eq +; GISEL-NEXT: ldp x4, x16, [sp, #216] ; 16-byte Folded Reload +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x13, x16, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x19, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: mov x19, x1 +; GISEL-NEXT: orr x13, x4, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x6, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: mov x6, x14 +; GISEL-NEXT: orr x13, x21, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x15, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x13, x28, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x0, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: mov x0, x23 +; GISEL-NEXT: orr x13, x2, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x20, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x13, x30, x13 +; GISEL-NEXT: ldr x30, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x7, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x13, x1, x13 +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x30, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x13, x14, x13 +; GISEL-NEXT: ldp x14, x2, [sp, #264] ; 16-byte Folded Reload +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x25, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x13, x26, x13 +; GISEL-NEXT: ldr x26, [sp, #288] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x13, x10, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: lsr x13, x11, x12 +; GISEL-NEXT: csel x10, x26, x10, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: str x13, [sp, #72] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x10, [sp, #88] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x10, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: lsl x11, x10, x17 +; GISEL-NEXT: csel x10, xzr, x13, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: ldr x17, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x13, [sp, #256] ; 8-byte Folded Reload +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: str x11, [sp, #56] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x22, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x11, x24, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x5, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x11, x2, x11 +; GISEL-NEXT: ldp x12, x5, [sp, #240] ; 16-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x27, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: mov x27, x30 +; GISEL-NEXT: orr x11, x17, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x23, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: mov x23, x20 +; GISEL-NEXT: orr x11, x16, x11 +; GISEL-NEXT: ldr x16, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x14, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x11, x4, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x13, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x11, x21, x11 +; GISEL-NEXT: ldr x21, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x15, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x11, x28, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x12, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x11, x16, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x20, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x11, x5, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x7, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x11, x1, x11 +; GISEL-NEXT: ldr x1, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x30, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x11, x6, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x25, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: orr x11, x3, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x10, x26, x10, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x11, xzr, x10, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x11, x21, x11, eq +; GISEL-NEXT: ldp x10, x20, [x1, #112] +; GISEL-NEXT: str x11, [sp, #80] ; 8-byte Folded Spill +; GISEL-NEXT: ldp x11, x4, [sp, #40] ; 16-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x21, x21, x4 +; GISEL-NEXT: lsl x28, x10, x11 +; GISEL-NEXT: csel x1, xzr, x21, eq +; GISEL-NEXT: str x21, [sp, #296] ; 8-byte Folded Spill +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x1, x28, x1 +; GISEL-NEXT: ldr x21, [sp, #72] ; 8-byte Folded Reload +; GISEL-NEXT: str x28, [sp, #312] ; 8-byte Folded Spill +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x28, [sp, #56] ; 8-byte Folded Reload +; GISEL-NEXT: csel x30, xzr, x21, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x30, x28, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x22, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: ldr x22, [sp, #64] ; 8-byte Folded Reload +; GISEL-NEXT: orr x30, x24, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x22, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x30, x2, x30 +; GISEL-NEXT: ldr x2, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x2, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x30, x17, x30 +; GISEL-NEXT: ldr x17, [sp, #224] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x0, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x30, x17, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x14, eq +; GISEL-NEXT: ldr x14, [sp, #216] ; 8-byte Folded Reload +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x30, x14, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x13, eq +; GISEL-NEXT: ldr x13, [sp, #200] ; 8-byte Folded Reload +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x30, x13, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x15, eq +; GISEL-NEXT: ldr x15, [sp, #32] ; 8-byte Folded Reload +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x30, x15, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x12, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x30, x16, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x23, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x30, x5, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x7, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x30, x19, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x27, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: orr x30, x6, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x30, xzr, x25, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: orr x30, x3, x30 +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: lsr x30, x10, x4 +; GISEL-NEXT: csel x1, x26, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x26, x10, x1, eq +; GISEL-NEXT: lsl x10, x20, x11 +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x16, xzr, x30, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: ldr x11, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: orr x10, x10, x16 +; GISEL-NEXT: ldr x16, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x11, x16, x11 +; GISEL-NEXT: ldr x16, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x21, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x11, x28, x11 +; GISEL-NEXT: ldp x29, x30, [sp, #400] ; 16-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x11, x24, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x22, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x11, x16, x11 +; GISEL-NEXT: ldr x16, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x2, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x11, x16, x11 +; GISEL-NEXT: ldp x22, x21, [sp, #368] ; 16-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x0, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x11, x17, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #264] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x11, x14, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #256] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x11, x13, x11 +; GISEL-NEXT: ldr x13, [sp, #112] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #24] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x11, x15, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x12, eq +; GISEL-NEXT: ldr x12, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x11, x12, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #192] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [x13] +; GISEL-NEXT: ldp x12, x11, [sp, #176] ; 16-byte Folded Reload +; GISEL-NEXT: stp x11, x12, [x13, #8] +; GISEL-NEXT: csel x11, xzr, x23, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x11, x5, x11 +; GISEL-NEXT: ldp x24, x23, [sp, #352] ; 16-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #168] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [x13, #24] +; GISEL-NEXT: ldp x12, x11, [sp, #152] ; 16-byte Folded Reload +; GISEL-NEXT: stp x11, x12, [x13, #32] +; GISEL-NEXT: csel x11, xzr, x7, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: orr x11, x19, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #144] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [x13, #48] +; GISEL-NEXT: ldp x12, x11, [sp, #128] ; 16-byte Folded Reload +; GISEL-NEXT: stp x11, x12, [x13, #56] +; GISEL-NEXT: csel x11, xzr, x27, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: orr x11, x6, x11 +; GISEL-NEXT: ldp x28, x27, [sp, #320] ; 16-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: ldr x11, [sp, #120] ; 8-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x11, [x13, #72] +; GISEL-NEXT: ldp x12, x11, [sp, #96] ; 16-byte Folded Reload +; GISEL-NEXT: stp x11, x12, [x13, #80] +; GISEL-NEXT: csel x11, xzr, x25, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: orr x11, x3, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: ldr x9, [sp, #288] ; 8-byte Folded Reload +; GISEL-NEXT: ldr x11, [sp, #88] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x9, x10, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: ldr x8, [sp, #80] ; 8-byte Folded Reload +; GISEL-NEXT: stp x11, x8, [x13, #96] +; GISEL-NEXT: csel x8, x20, x9, eq +; GISEL-NEXT: stp x26, x8, [x13, #112] +; GISEL-NEXT: ldp x20, x19, [sp, #384] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x26, x25, [sp, #336] ; 16-byte Folded Reload +; GISEL-NEXT: add sp, sp, #416 +; GISEL-NEXT: ret +entry: + %input_val = load i1024, ptr %input, align 128 + %shift_ext = zext i32 %shift to i1024 + %shifted = shl i1024 %input_val, %shift_ext + store i1024 %shifted, ptr %result, align 128 + ret void +} + +define void @test_lshr_i1024(ptr %result, ptr %input, i32 %shift) { +; SDAG-LABEL: test_lshr_i1024: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: sub sp, sp, #336 +; SDAG-NEXT: stp x28, x27, [sp, #256] ; 16-byte Folded Spill +; SDAG-NEXT: stp x26, x25, [sp, #272] ; 16-byte Folded Spill +; SDAG-NEXT: stp x24, x23, [sp, #288] ; 16-byte Folded Spill +; SDAG-NEXT: stp x22, x21, [sp, #304] ; 16-byte Folded Spill +; SDAG-NEXT: stp x20, x19, [sp, #320] ; 16-byte Folded Spill +; SDAG-NEXT: .cfi_def_cfa_offset 336 +; SDAG-NEXT: .cfi_offset w19, -8 +; SDAG-NEXT: .cfi_offset w20, -16 +; SDAG-NEXT: .cfi_offset w21, -24 +; SDAG-NEXT: .cfi_offset w22, -32 +; SDAG-NEXT: .cfi_offset w23, -40 +; SDAG-NEXT: .cfi_offset w24, -48 +; SDAG-NEXT: .cfi_offset w25, -56 +; SDAG-NEXT: .cfi_offset w26, -64 +; SDAG-NEXT: .cfi_offset w27, -72 +; SDAG-NEXT: .cfi_offset w28, -80 +; SDAG-NEXT: ldp x8, x9, [x1, #112] +; SDAG-NEXT: movi.2d v0, #0000000000000000 +; SDAG-NEXT: ldp q1, q2, [x1] +; SDAG-NEXT: mov x10, sp +; SDAG-NEXT: ldp q3, q4, [x1, #32] +; SDAG-NEXT: ldr q7, [x1, #96] +; SDAG-NEXT: ldp q5, q6, [x1, #64] +; SDAG-NEXT: mvn w1, w2 +; SDAG-NEXT: stp x8, x9, [sp, #112] +; SDAG-NEXT: mov w8, w2 +; SDAG-NEXT: lsr x9, x8, #3 +; SDAG-NEXT: stp q2, q3, [sp, #16] +; SDAG-NEXT: and x14, x8, #0x3f +; SDAG-NEXT: stp q4, q5, [sp, #48] +; SDAG-NEXT: eor x15, x14, #0x3f +; SDAG-NEXT: and x9, x9, #0x78 +; SDAG-NEXT: stp q6, q7, [sp, #80] +; SDAG-NEXT: stp q0, q0, [sp, #128] +; SDAG-NEXT: add x10, x10, x9 +; SDAG-NEXT: stp q0, q0, [sp, #160] +; SDAG-NEXT: stp q0, q0, [sp, #192] +; SDAG-NEXT: stp q0, q0, [sp, #224] +; SDAG-NEXT: str q1, [sp] +; SDAG-NEXT: ldp x11, x9, [x10, #16] +; SDAG-NEXT: ldr x16, [x10, #32] +; SDAG-NEXT: ldp x12, x13, [x10, #40] +; SDAG-NEXT: ldr x3, [x10, #56] +; SDAG-NEXT: ldp x4, x6, [x10, #64] +; SDAG-NEXT: lsl x2, x16, #1 +; SDAG-NEXT: lsl x17, x11, #1 +; SDAG-NEXT: ldp x24, x26, [x10, #112] +; SDAG-NEXT: lsl x5, x13, #1 +; SDAG-NEXT: lsr x13, x13, x8 +; SDAG-NEXT: lsr x11, x11, x8 +; SDAG-NEXT: lsl x14, x17, x1 +; SDAG-NEXT: lsl x7, x6, #1 +; SDAG-NEXT: lsl x17, x2, x1 +; SDAG-NEXT: lsl x2, x5, x1 +; SDAG-NEXT: ldp x5, x22, [x10, #80] +; SDAG-NEXT: lsr x19, x4, x8 +; SDAG-NEXT: lsl x7, x7, x15 +; SDAG-NEXT: lsl x21, x4, #1 +; SDAG-NEXT: lsr x6, x6, x8 +; SDAG-NEXT: lsl x27, x24, #1 +; SDAG-NEXT: lsr x24, x24, x8 +; SDAG-NEXT: lsl x23, x5, #1 +; SDAG-NEXT: orr x4, x7, x19 +; SDAG-NEXT: lsr x25, x22, x8 +; SDAG-NEXT: ldp x20, x7, [x10, #96] +; SDAG-NEXT: lsl x21, x21, x1 +; SDAG-NEXT: lsl x23, x23, x1 +; SDAG-NEXT: lsl x22, x22, #1 +; SDAG-NEXT: lsr x5, x5, x8 +; SDAG-NEXT: lsr x16, x16, x8 +; SDAG-NEXT: lsl x19, x20, #1 +; SDAG-NEXT: orr x6, x6, x23 +; SDAG-NEXT: lsl x23, x7, #1 +; SDAG-NEXT: lsr x20, x20, x8 +; SDAG-NEXT: lsr x7, x7, x8 +; SDAG-NEXT: lsl x22, x22, x15 +; SDAG-NEXT: lsl x19, x19, x1 +; SDAG-NEXT: lsl x1, x27, x1 +; SDAG-NEXT: lsl x23, x23, x15 +; SDAG-NEXT: orr x5, x22, x5 +; SDAG-NEXT: ldp x28, x27, [sp, #256] ; 16-byte Folded Reload +; SDAG-NEXT: orr x19, x25, x19 +; SDAG-NEXT: lsl x25, x26, #1 +; SDAG-NEXT: orr x20, x23, x20 +; SDAG-NEXT: orr x1, x7, x1 +; SDAG-NEXT: ldp x23, x10, [x10] +; SDAG-NEXT: stp x20, x1, [x0, #96] +; SDAG-NEXT: lsl x20, x3, #1 +; SDAG-NEXT: lsl x25, x25, x15 +; SDAG-NEXT: lsr x26, x26, x8 +; SDAG-NEXT: stp x5, x19, [x0, #80] +; SDAG-NEXT: lsr x3, x3, x8 +; SDAG-NEXT: lsl x19, x20, x15 +; SDAG-NEXT: orr x7, x25, x24 +; SDAG-NEXT: lsl x1, x9, #1 +; SDAG-NEXT: stp x7, x26, [x0, #112] +; SDAG-NEXT: lsl x7, x10, #1 +; SDAG-NEXT: orr x3, x3, x21 +; SDAG-NEXT: orr x13, x19, x13 +; SDAG-NEXT: lsl x5, x12, #1 +; SDAG-NEXT: lsr x9, x9, x8 +; SDAG-NEXT: stp x13, x3, [x0, #48] +; SDAG-NEXT: lsl x13, x1, x15 +; SDAG-NEXT: lsr x23, x23, x8 +; SDAG-NEXT: lsr x12, x12, x8 +; SDAG-NEXT: lsr x8, x10, x8 +; SDAG-NEXT: lsl x10, x7, x15 +; SDAG-NEXT: stp x4, x6, [x0, #64] +; SDAG-NEXT: lsl x4, x5, x15 +; SDAG-NEXT: orr x9, x9, x17 +; SDAG-NEXT: orr x11, x13, x11 +; SDAG-NEXT: ldp x20, x19, [sp, #320] ; 16-byte Folded Reload +; SDAG-NEXT: stp x11, x9, [x0, #16] +; SDAG-NEXT: orr x9, x10, x23 +; SDAG-NEXT: orr x12, x12, x2 +; SDAG-NEXT: ldp x22, x21, [sp, #304] ; 16-byte Folded Reload +; SDAG-NEXT: orr x16, x4, x16 +; SDAG-NEXT: ldp x24, x23, [sp, #288] ; 16-byte Folded Reload +; SDAG-NEXT: orr x8, x8, x14 +; SDAG-NEXT: ldp x26, x25, [sp, #272] ; 16-byte Folded Reload +; SDAG-NEXT: stp x16, x12, [x0, #32] +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: add sp, sp, #336 +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i1024: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: sub sp, sp, #416 +; GISEL-NEXT: stp x28, x27, [sp, #320] ; 16-byte Folded Spill +; GISEL-NEXT: stp x26, x25, [sp, #336] ; 16-byte Folded Spill +; GISEL-NEXT: stp x24, x23, [sp, #352] ; 16-byte Folded Spill +; GISEL-NEXT: stp x22, x21, [sp, #368] ; 16-byte Folded Spill +; GISEL-NEXT: stp x20, x19, [sp, #384] ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #400] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 416 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w19, -24 +; GISEL-NEXT: .cfi_offset w20, -32 +; GISEL-NEXT: .cfi_offset w21, -40 +; GISEL-NEXT: .cfi_offset w22, -48 +; GISEL-NEXT: .cfi_offset w23, -56 +; GISEL-NEXT: .cfi_offset w24, -64 +; GISEL-NEXT: .cfi_offset w25, -72 +; GISEL-NEXT: .cfi_offset w26, -80 +; GISEL-NEXT: .cfi_offset w27, -88 +; GISEL-NEXT: .cfi_offset w28, -96 +; GISEL-NEXT: mov w8, w2 +; GISEL-NEXT: ldp x20, x16, [x1] +; GISEL-NEXT: mov w9, #64 ; =0x40 +; GISEL-NEXT: and x14, x8, #0x3f +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: sub x15, x9, x14 +; GISEL-NEXT: ldp x12, x13, [x1, #16] +; GISEL-NEXT: lsl x10, x16, x15 +; GISEL-NEXT: lsr x9, x8, #6 +; GISEL-NEXT: lsr x11, x20, x14 +; GISEL-NEXT: lsr x19, x16, x14 +; GISEL-NEXT: str x16, [sp, #264] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: lsl x22, x12, x15 +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: str x12, [sp, #240] ; 8-byte Folded Spill +; GISEL-NEXT: lsr x26, x12, x14 +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x24, x13, x15 +; GISEL-NEXT: csel x11, xzr, x22, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: lsr x5, x13, x14 +; GISEL-NEXT: orr x11, x19, x11 +; GISEL-NEXT: ldp x12, x16, [x1, #32] +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x0, [sp, #296] ; 8-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x24, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: str x13, [sp, #216] ; 8-byte Folded Spill +; GISEL-NEXT: lsl x23, x12, x15 +; GISEL-NEXT: orr x11, x26, x11 +; GISEL-NEXT: stp x12, x16, [sp, #176] ; 16-byte Folded Spill +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x17, x12, x14 +; GISEL-NEXT: csel x11, xzr, x23, eq +; GISEL-NEXT: lsl x0, x16, x15 +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x11, x5, x11 +; GISEL-NEXT: ldp x13, x12, [x1, #48] +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x7, x16, x14 +; GISEL-NEXT: csel x11, xzr, x0, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: stp x17, x0, [sp, #152] ; 16-byte Folded Spill +; GISEL-NEXT: lsl x2, x13, x15 +; GISEL-NEXT: orr x11, x17, x11 +; GISEL-NEXT: stp x13, x12, [sp, #192] ; 16-byte Folded Spill +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x13, x13, x14 +; GISEL-NEXT: csel x11, xzr, x2, eq +; GISEL-NEXT: lsl x0, x12, x15 +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: stp x13, x2, [sp, #136] ; 16-byte Folded Spill +; GISEL-NEXT: orr x11, x7, x11 +; GISEL-NEXT: lsr x12, x12, x14 +; GISEL-NEXT: ldp x4, x2, [x1, #64] +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x17, [sp, #144] ; 8-byte Folded Reload +; GISEL-NEXT: stp x5, x23, [sp, #24] ; 16-byte Folded Spill +; GISEL-NEXT: csel x11, xzr, x0, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: lsl x3, x4, x15 +; GISEL-NEXT: orr x11, x13, x11 +; GISEL-NEXT: str x4, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x6, x2, x15 +; GISEL-NEXT: csel x11, xzr, x3, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: lsr x13, x4, x14 +; GISEL-NEXT: orr x11, x12, x11 +; GISEL-NEXT: str x2, [sp, #224] ; 8-byte Folded Spill +; GISEL-NEXT: csel x16, x11, x10, eq +; GISEL-NEXT: ldp x10, x4, [x1, #80] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x11, x2 +; GISEL-NEXT: stp x13, x12, [sp, #120] ; 16-byte Folded Spill +; GISEL-NEXT: csel x2, xzr, x6, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: lsr x11, x11, x14 +; GISEL-NEXT: orr x2, x13, x2 +; GISEL-NEXT: lsl x12, x10, x15 +; GISEL-NEXT: str x10, [sp, #232] ; 8-byte Folded Spill +; GISEL-NEXT: csel x16, x2, x16, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x10, x10, x14 +; GISEL-NEXT: csel x2, xzr, x12, eq +; GISEL-NEXT: str x12, [sp, #312] ; 8-byte Folded Spill +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x2, x11, x2 +; GISEL-NEXT: lsl x12, x4, x15 +; GISEL-NEXT: str x10, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: csel x16, x2, x16, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x13, x4, x14 +; GISEL-NEXT: stp x12, x11, [sp, #104] ; 16-byte Folded Spill +; GISEL-NEXT: ldr x11, [x1, #96] +; GISEL-NEXT: csel x2, xzr, x12, eq +; GISEL-NEXT: orr x2, x10, x2 +; GISEL-NEXT: ldp x10, x30, [x1, #104] +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: lsl x28, x11, x15 +; GISEL-NEXT: stp x4, x11, [sp, #248] ; 16-byte Folded Spill +; GISEL-NEXT: csel x16, x2, x16, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x3, [sp, #16] ; 8-byte Folded Spill +; GISEL-NEXT: csel x2, xzr, x28, eq +; GISEL-NEXT: lsl x12, x10, x15 +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x2, x13, x2 +; GISEL-NEXT: lsl x21, x30, x15 +; GISEL-NEXT: stp x10, x30, [sp, #272] ; 16-byte Folded Spill +; GISEL-NEXT: csel x16, x2, x16, eq +; GISEL-NEXT: stp x12, x13, [sp, #88] ; 16-byte Folded Spill +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x13, x11, x14 +; GISEL-NEXT: csel x2, xzr, x12, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: ldr x11, [x1, #120] +; GISEL-NEXT: lsr x10, x10, x14 +; GISEL-NEXT: lsr x27, x30, x14 +; GISEL-NEXT: orr x4, x13, x2 +; GISEL-NEXT: mov x12, x23 +; GISEL-NEXT: str x28, [sp, #48] ; 8-byte Folded Spill +; GISEL-NEXT: csel x16, x4, x16, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x25, x11, x15 +; GISEL-NEXT: csel x1, xzr, x21, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: stp x10, x13, [sp, #72] ; 16-byte Folded Spill +; GISEL-NEXT: orr x1, x10, x1 +; GISEL-NEXT: lsr x10, x11, x14 +; GISEL-NEXT: str x11, [sp, #288] ; 8-byte Folded Spill +; GISEL-NEXT: csel x1, x1, x16, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x21, [sp, #40] ; 8-byte Folded Spill +; GISEL-NEXT: csel x30, xzr, x25, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: stp x27, x10, [sp, #56] ; 16-byte Folded Spill +; GISEL-NEXT: orr x30, x27, x30 +; GISEL-NEXT: ldp x11, x13, [sp, #152] ; 16-byte Folded Reload +; GISEL-NEXT: csel x1, x30, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: mov x30, x7 +; GISEL-NEXT: csel x1, x10, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x20, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x22, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: str x10, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: orr x1, x19, x1 +; GISEL-NEXT: ldp x20, x14, [sp, #112] ; 16-byte Folded Reload +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x24, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x19, x26, x19 +; GISEL-NEXT: ldp x10, x15, [sp, #304] ; 16-byte Folded Reload +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x23, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x19, x5, x19 +; GISEL-NEXT: ldp x16, x22, [sp, #96] ; 16-byte Folded Reload +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x13, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x19, x11, x19 +; GISEL-NEXT: ldp x4, x2, [sp, #80] ; 16-byte Folded Reload +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x17, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x19, x7, x19 +; GISEL-NEXT: mov x7, x0 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x0, eq +; GISEL-NEXT: ldp x23, x0, [sp, #128] ; 16-byte Folded Reload +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x19, x0, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x3, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x19, x23, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x6, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x19, x14, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x15, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x19, x20, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x22, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x19, x10, x19 +; GISEL-NEXT: ldr x10, [sp, #264] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x28, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x19, x16, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x2, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x19, x4, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x21, eq +; GISEL-NEXT: ldp x28, x21, [sp, #64] ; 16-byte Folded Reload +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: orr x19, x21, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x25, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: orr x19, x27, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x1, x28, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x10, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x24, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: mov x24, x11 +; GISEL-NEXT: orr x1, x26, x1 +; GISEL-NEXT: str x10, [sp, #264] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x10, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x26, x13 +; GISEL-NEXT: csel x19, xzr, x12, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: mov x12, x30 +; GISEL-NEXT: orr x19, x5, x19 +; GISEL-NEXT: mov x5, x15 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x13, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: mov x13, x23 +; GISEL-NEXT: orr x19, x11, x19 +; GISEL-NEXT: mov x11, x17 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x17, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: mov x17, x20 +; GISEL-NEXT: orr x19, x30, x19 +; GISEL-NEXT: mov x30, x7 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x27, x30 +; GISEL-NEXT: csel x19, xzr, x7, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: mov x7, x14 +; GISEL-NEXT: orr x19, x0, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x3, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: mov x3, x22 +; GISEL-NEXT: orr x19, x23, x19 +; GISEL-NEXT: ldr x23, [sp, #16] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x6, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x19, x14, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x15, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: ldr x15, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: orr x19, x20, x19 +; GISEL-NEXT: ldp x14, x20, [sp, #40] ; 16-byte Folded Reload +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x22, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: ldr x22, [sp, #56] ; 8-byte Folded Reload +; GISEL-NEXT: orr x19, x15, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x20, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x19, x16, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x2, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x19, x4, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x14, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x19, x21, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x25, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: orr x19, x22, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x1, x28, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x10, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x10, [sp, #240] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x10, [sp, #32] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, xzr, x10, eq +; GISEL-NEXT: ldr x10, [sp, #24] ; 8-byte Folded Reload +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x1, x10, x1 +; GISEL-NEXT: ldr x10, [sp, #216] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x26, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x19, x24, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x11, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x19, x12, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x30, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: mov x30, x0 +; GISEL-NEXT: orr x19, x0, x19 +; GISEL-NEXT: mov x0, x13 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x23, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x19, x13, x19 +; GISEL-NEXT: mov x13, x3 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x6, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x19, x7, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x5, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: mov x5, x17 +; GISEL-NEXT: orr x19, x17, x19 +; GISEL-NEXT: mov x17, x22 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x3, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: mov x3, x20 +; GISEL-NEXT: orr x19, x15, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x20, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: mov x20, x14 +; GISEL-NEXT: orr x19, x16, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x2, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x19, x4, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x14, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: mov x14, x13 +; GISEL-NEXT: orr x19, x21, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x25, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: orr x19, x22, x19 +; GISEL-NEXT: mov x22, x30 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x1, x28, x1, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x10, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x26, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: str x10, [sp, #216] ; 8-byte Folded Spill +; GISEL-NEXT: orr x1, x24, x1 +; GISEL-NEXT: ldr x10, [sp, #176] ; 8-byte Folded Reload +; GISEL-NEXT: mov x24, x3 +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x26, x5 +; GISEL-NEXT: csel x19, xzr, x11, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x19, x12, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x27, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x19, x30, x19 +; GISEL-NEXT: ldr x30, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x23, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x19, x0, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x6, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x19, x7, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x30, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x19, x5, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x13, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x19, x15, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x3, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x19, x16, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x2, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x19, x4, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x20, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x19, x21, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x19, xzr, x25, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: orr x19, x17, x19 +; GISEL-NEXT: csel x1, x19, x1, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x1, x28, x1, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x19, x10, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x10, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, xzr, x11, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: mov x11, x23 +; GISEL-NEXT: orr x1, x12, x1 +; GISEL-NEXT: mov x12, x0 +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x27, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x3, x22, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x23, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: mov x23, x17 +; GISEL-NEXT: orr x3, x0, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x6, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x3, x7, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x30, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x3, x26, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x13, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x3, x15, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x24, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x3, x16, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x2, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x3, x4, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x20, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x3, x21, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x25, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: orr x3, x17, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: mov x3, x4 +; GISEL-NEXT: csel x1, x28, x1, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x10, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x17, xzr, x27, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: str x10, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: orr x17, x22, x17 +; GISEL-NEXT: ldr x10, [sp, #192] ; 8-byte Folded Reload +; GISEL-NEXT: csel x17, x17, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x11, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x0, x12, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x6, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x0, x7, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x30, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x0, x26, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x13, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x0, x15, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x24, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x0, x16, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x2, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x0, x4, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x20, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x0, x21, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x25, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: orr x0, x23, x0 +; GISEL-NEXT: csel x17, x0, x17, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x17, x28, x17, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x17, xzr, x17, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x17, x10, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x10, [sp, #200] ; 8-byte Folded Reload +; GISEL-NEXT: csel x13, xzr, x11, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x12, x12, x13 +; GISEL-NEXT: csel x12, x12, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x6, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x13, x7, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x30, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x13, x26, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x14, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x13, x15, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x24, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x13, x16, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x2, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x13, x4, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x20, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x13, x21, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x25, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: orr x13, x23, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x12, x28, x12, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x12, xzr, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x12, x10, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x6, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x10, x7, x11 +; GISEL-NEXT: csel x10, x10, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x30, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x11, x26, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x14, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x11, x15, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x24, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x11, x16, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x2, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x11, x4, x11 +; GISEL-NEXT: ldr x4, [sp, #168] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x20, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x11, x21, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x25, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: orr x11, x23, x11 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: ldr x11, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: csel x10, x28, x10, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x10, x11, x10, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x11, xzr, x30, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x11, x26, x11 +; GISEL-NEXT: ldp x29, x30, [sp, #400] ; 16-byte Folded Reload +; GISEL-NEXT: csel x11, x11, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x14, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x13, x15, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x24, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x13, x16, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x2, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x13, x3, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x20, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x13, x21, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x25, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: orr x13, x23, x13 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: ldr x13, [sp, #224] ; 8-byte Folded Reload +; GISEL-NEXT: csel x11, x28, x11, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x11, xzr, x11, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x11, x13, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x14, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x13, x15, x13 +; GISEL-NEXT: csel x13, x13, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x24, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x0, x16, x0 +; GISEL-NEXT: csel x13, x0, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x2, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x0, x3, x0 +; GISEL-NEXT: csel x13, x0, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x20, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x0, x21, x0 +; GISEL-NEXT: csel x13, x0, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x25, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: orr x0, x23, x0 +; GISEL-NEXT: csel x13, x0, x13, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: ldr x0, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: csel x13, x28, x13, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x13, xzr, x13, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x13, x0, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x24, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x0, x16, x0 +; GISEL-NEXT: ldr x16, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: csel x0, x0, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x2, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x1, x3, x1 +; GISEL-NEXT: csel x0, x1, x0, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x20, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x1, x21, x1 +; GISEL-NEXT: csel x0, x1, x0, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x25, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: orr x1, x23, x1 +; GISEL-NEXT: csel x0, x1, x0, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: ldr x1, [sp, #248] ; 8-byte Folded Reload +; GISEL-NEXT: csel x0, x28, x0, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x0, xzr, x0, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x0, x1, x0, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x2, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x1, x3, x1 +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x20, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x3, x21, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x25, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: orr x3, x23, x3 +; GISEL-NEXT: csel x1, x3, x1, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: ldr x3, [sp, #256] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x28, x1, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x3, x3, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x1, xzr, x20, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x1, x21, x1 +; GISEL-NEXT: ldp x22, x21, [sp, #368] ; 16-byte Folded Reload +; GISEL-NEXT: csel x1, x1, xzr, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x2, xzr, x25, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: orr x2, x23, x2 +; GISEL-NEXT: csel x1, x2, x1, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: ldr x2, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, x28, x1, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x1, xzr, x1, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x2, x2, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x1, [sp, #264] ; 8-byte Folded Reload +; GISEL-NEXT: csel x15, xzr, x25, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: orr x15, x23, x15 +; GISEL-NEXT: ldp x24, x23, [sp, #352] ; 16-byte Folded Reload +; GISEL-NEXT: csel x15, x15, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: csel x15, x28, x15, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x15, xzr, x15, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x15, x16, x15, eq +; GISEL-NEXT: cmp x9, #0 +; GISEL-NEXT: ldr x16, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: csel x14, x28, xzr, eq +; GISEL-NEXT: cmp x9, #1 +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #2 +; GISEL-NEXT: stp x17, x12, [x16, #48] +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #3 +; GISEL-NEXT: stp x10, x11, [x16, #64] +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #4 +; GISEL-NEXT: stp x4, x1, [x16] +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #5 +; GISEL-NEXT: ldr x4, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #6 +; GISEL-NEXT: ldr x1, [sp, #216] ; 8-byte Folded Reload +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #7 +; GISEL-NEXT: stp x13, x0, [x16, #80] +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #8 +; GISEL-NEXT: stp x4, x1, [x16, #16] +; GISEL-NEXT: csel x14, xzr, x14, eq +; GISEL-NEXT: cmp x9, #9 +; GISEL-NEXT: ldr x1, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, xzr, x14, eq +; GISEL-NEXT: cmp x9, #10 +; GISEL-NEXT: stp x3, x2, [x16, #96] +; GISEL-NEXT: csel x10, xzr, x12, eq +; GISEL-NEXT: cmp x9, #11 +; GISEL-NEXT: stp x19, x1, [x16, #32] +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #12 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #13 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #14 +; GISEL-NEXT: csel x10, xzr, x10, eq +; GISEL-NEXT: cmp x9, #15 +; GISEL-NEXT: csel x9, xzr, x10, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: ldr x8, [sp, #288] ; 8-byte Folded Reload +; GISEL-NEXT: ldp x20, x19, [sp, #384] ; 16-byte Folded Reload +; GISEL-NEXT: ldp x26, x25, [sp, #336] ; 16-byte Folded Reload +; GISEL-NEXT: csel x8, x8, x9, eq +; GISEL-NEXT: ldp x28, x27, [sp, #320] ; 16-byte Folded Reload +; GISEL-NEXT: stp x15, x8, [x16, #112] +; GISEL-NEXT: add sp, sp, #416 +; GISEL-NEXT: ret +entry: + %input_val = load i1024, ptr %input, align 128 + %shift_ext = zext i32 %shift to i1024 + %shifted = lshr i1024 %input_val, %shift_ext + store i1024 %shifted, ptr %result, align 128 + ret void +} + +define void @test_ashr_i1024(ptr %result, ptr %input, i32 %shift) { +; SDAG-LABEL: test_ashr_i1024: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: sub sp, sp, #336 +; SDAG-NEXT: stp x28, x27, [sp, #256] ; 16-byte Folded Spill +; SDAG-NEXT: stp x26, x25, [sp, #272] ; 16-byte Folded Spill +; SDAG-NEXT: stp x24, x23, [sp, #288] ; 16-byte Folded Spill +; SDAG-NEXT: stp x22, x21, [sp, #304] ; 16-byte Folded Spill +; SDAG-NEXT: stp x20, x19, [sp, #320] ; 16-byte Folded Spill +; SDAG-NEXT: .cfi_def_cfa_offset 336 +; SDAG-NEXT: .cfi_offset w19, -8 +; SDAG-NEXT: .cfi_offset w20, -16 +; SDAG-NEXT: .cfi_offset w21, -24 +; SDAG-NEXT: .cfi_offset w22, -32 +; SDAG-NEXT: .cfi_offset w23, -40 +; SDAG-NEXT: .cfi_offset w24, -48 +; SDAG-NEXT: .cfi_offset w25, -56 +; SDAG-NEXT: .cfi_offset w26, -64 +; SDAG-NEXT: .cfi_offset w27, -72 +; SDAG-NEXT: .cfi_offset w28, -80 +; SDAG-NEXT: ldp x8, x9, [x1, #112] +; SDAG-NEXT: mov x11, sp +; SDAG-NEXT: ldp q0, q1, [x1] +; SDAG-NEXT: ldr q6, [x1, #96] +; SDAG-NEXT: ldp q2, q3, [x1, #32] +; SDAG-NEXT: ldp q4, q5, [x1, #64] +; SDAG-NEXT: mvn w1, w2 +; SDAG-NEXT: stp x8, x9, [sp, #112] +; SDAG-NEXT: mov w8, w2 +; SDAG-NEXT: asr x9, x9, #63 +; SDAG-NEXT: lsr x10, x8, #3 +; SDAG-NEXT: stp q1, q2, [sp, #16] +; SDAG-NEXT: and x14, x8, #0x3f +; SDAG-NEXT: stp q3, q4, [sp, #48] +; SDAG-NEXT: eor x15, x14, #0x3f +; SDAG-NEXT: and x10, x10, #0x78 +; SDAG-NEXT: stp q5, q6, [sp, #80] +; SDAG-NEXT: str q0, [sp] +; SDAG-NEXT: add x10, x11, x10 +; SDAG-NEXT: stp x9, x9, [sp, #240] +; SDAG-NEXT: stp x9, x9, [sp, #224] +; SDAG-NEXT: stp x9, x9, [sp, #208] +; SDAG-NEXT: stp x9, x9, [sp, #192] +; SDAG-NEXT: stp x9, x9, [sp, #176] +; SDAG-NEXT: stp x9, x9, [sp, #160] +; SDAG-NEXT: stp x9, x9, [sp, #144] +; SDAG-NEXT: stp x9, x9, [sp, #128] +; SDAG-NEXT: ldp x11, x9, [x10, #16] +; SDAG-NEXT: ldr x16, [x10, #32] +; SDAG-NEXT: ldp x12, x13, [x10, #40] +; SDAG-NEXT: ldr x3, [x10, #56] +; SDAG-NEXT: ldp x4, x6, [x10, #64] +; SDAG-NEXT: lsl x2, x16, #1 +; SDAG-NEXT: lsl x17, x11, #1 +; SDAG-NEXT: ldp x24, x26, [x10, #112] +; SDAG-NEXT: lsl x5, x13, #1 +; SDAG-NEXT: lsr x13, x13, x8 +; SDAG-NEXT: lsr x11, x11, x8 +; SDAG-NEXT: lsl x14, x17, x1 +; SDAG-NEXT: lsl x7, x6, #1 +; SDAG-NEXT: lsl x17, x2, x1 +; SDAG-NEXT: lsl x2, x5, x1 +; SDAG-NEXT: ldp x5, x22, [x10, #80] +; SDAG-NEXT: lsr x19, x4, x8 +; SDAG-NEXT: lsl x7, x7, x15 +; SDAG-NEXT: lsl x21, x4, #1 +; SDAG-NEXT: lsr x6, x6, x8 +; SDAG-NEXT: lsl x27, x24, #1 +; SDAG-NEXT: lsr x24, x24, x8 +; SDAG-NEXT: lsl x23, x5, #1 +; SDAG-NEXT: orr x4, x7, x19 +; SDAG-NEXT: lsr x25, x22, x8 +; SDAG-NEXT: ldp x20, x7, [x10, #96] +; SDAG-NEXT: lsl x21, x21, x1 +; SDAG-NEXT: lsl x23, x23, x1 +; SDAG-NEXT: lsl x22, x22, #1 +; SDAG-NEXT: lsr x5, x5, x8 +; SDAG-NEXT: lsr x16, x16, x8 +; SDAG-NEXT: lsl x19, x20, #1 +; SDAG-NEXT: orr x6, x6, x23 +; SDAG-NEXT: lsl x23, x7, #1 +; SDAG-NEXT: lsr x20, x20, x8 +; SDAG-NEXT: lsr x7, x7, x8 +; SDAG-NEXT: lsl x22, x22, x15 +; SDAG-NEXT: lsl x19, x19, x1 +; SDAG-NEXT: lsl x1, x27, x1 +; SDAG-NEXT: lsl x23, x23, x15 +; SDAG-NEXT: orr x5, x22, x5 +; SDAG-NEXT: ldp x28, x27, [sp, #256] ; 16-byte Folded Reload +; SDAG-NEXT: orr x19, x25, x19 +; SDAG-NEXT: lsl x25, x26, #1 +; SDAG-NEXT: orr x20, x23, x20 +; SDAG-NEXT: orr x1, x7, x1 +; SDAG-NEXT: ldp x23, x10, [x10] +; SDAG-NEXT: stp x20, x1, [x0, #96] +; SDAG-NEXT: lsl x20, x3, #1 +; SDAG-NEXT: lsl x25, x25, x15 +; SDAG-NEXT: asr x26, x26, x8 +; SDAG-NEXT: stp x5, x19, [x0, #80] +; SDAG-NEXT: lsr x3, x3, x8 +; SDAG-NEXT: lsl x19, x20, x15 +; SDAG-NEXT: orr x7, x25, x24 +; SDAG-NEXT: lsl x1, x9, #1 +; SDAG-NEXT: stp x7, x26, [x0, #112] +; SDAG-NEXT: lsl x7, x10, #1 +; SDAG-NEXT: orr x3, x3, x21 +; SDAG-NEXT: orr x13, x19, x13 +; SDAG-NEXT: lsl x5, x12, #1 +; SDAG-NEXT: lsr x9, x9, x8 +; SDAG-NEXT: stp x13, x3, [x0, #48] +; SDAG-NEXT: lsl x13, x1, x15 +; SDAG-NEXT: lsr x23, x23, x8 +; SDAG-NEXT: lsr x12, x12, x8 +; SDAG-NEXT: lsr x8, x10, x8 +; SDAG-NEXT: lsl x10, x7, x15 +; SDAG-NEXT: stp x4, x6, [x0, #64] +; SDAG-NEXT: lsl x4, x5, x15 +; SDAG-NEXT: orr x9, x9, x17 +; SDAG-NEXT: orr x11, x13, x11 +; SDAG-NEXT: ldp x20, x19, [sp, #320] ; 16-byte Folded Reload +; SDAG-NEXT: stp x11, x9, [x0, #16] +; SDAG-NEXT: orr x9, x10, x23 +; SDAG-NEXT: orr x12, x12, x2 +; SDAG-NEXT: ldp x22, x21, [sp, #304] ; 16-byte Folded Reload +; SDAG-NEXT: orr x16, x4, x16 +; SDAG-NEXT: ldp x24, x23, [sp, #288] ; 16-byte Folded Reload +; SDAG-NEXT: orr x8, x8, x14 +; SDAG-NEXT: ldp x26, x25, [sp, #272] ; 16-byte Folded Reload +; SDAG-NEXT: stp x16, x12, [x0, #32] +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: add sp, sp, #336 +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i1024: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: sub sp, sp, #432 +; GISEL-NEXT: stp x28, x27, [sp, #336] ; 16-byte Folded Spill +; GISEL-NEXT: stp x26, x25, [sp, #352] ; 16-byte Folded Spill +; GISEL-NEXT: stp x24, x23, [sp, #368] ; 16-byte Folded Spill +; GISEL-NEXT: stp x22, x21, [sp, #384] ; 16-byte Folded Spill +; GISEL-NEXT: stp x20, x19, [sp, #400] ; 16-byte Folded Spill +; GISEL-NEXT: stp x29, x30, [sp, #416] ; 16-byte Folded Spill +; GISEL-NEXT: .cfi_def_cfa_offset 432 +; GISEL-NEXT: .cfi_offset w30, -8 +; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: .cfi_offset w19, -24 +; GISEL-NEXT: .cfi_offset w20, -32 +; GISEL-NEXT: .cfi_offset w21, -40 +; GISEL-NEXT: .cfi_offset w22, -48 +; GISEL-NEXT: .cfi_offset w23, -56 +; GISEL-NEXT: .cfi_offset w24, -64 +; GISEL-NEXT: .cfi_offset w25, -72 +; GISEL-NEXT: .cfi_offset w26, -80 +; GISEL-NEXT: .cfi_offset w27, -88 +; GISEL-NEXT: .cfi_offset w28, -96 +; GISEL-NEXT: str x0, [sp, #264] ; 8-byte Folded Spill +; GISEL-NEXT: mov w8, w2 +; GISEL-NEXT: mov w9, #64 ; =0x40 +; GISEL-NEXT: ldp x7, x0, [x1] +; GISEL-NEXT: and x15, x8, #0x3f +; GISEL-NEXT: sub x14, x9, x15 +; GISEL-NEXT: ldr x28, [x1, #120] +; GISEL-NEXT: lsr x10, x8, #6 +; GISEL-NEXT: ldp x17, x16, [x1, #16] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x9, x0, x14 +; GISEL-NEXT: lsr x12, x7, x15 +; GISEL-NEXT: asr x11, x28, #63 +; GISEL-NEXT: lsr x20, x0, x15 +; GISEL-NEXT: str x0, [sp, #232] ; 8-byte Folded Spill +; GISEL-NEXT: lsl x27, x28, x14 +; GISEL-NEXT: csel x9, xzr, x9, eq +; GISEL-NEXT: lsl x19, x17, x14 +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x12, x9 +; GISEL-NEXT: str x17, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: lsr x2, x17, x15 +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x30, x16, x14 +; GISEL-NEXT: csel x12, xzr, x19, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: lsr x25, x16, x15 +; GISEL-NEXT: orr x12, x20, x12 +; GISEL-NEXT: ldp x13, x17, [x1, #32] +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x16, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: csel x12, xzr, x30, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: str x2, [sp, #88] ; 8-byte Folded Spill +; GISEL-NEXT: lsl x24, x13, x14 +; GISEL-NEXT: orr x12, x2, x12 +; GISEL-NEXT: str x13, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x21, x13, x15 +; GISEL-NEXT: csel x12, xzr, x24, eq +; GISEL-NEXT: lsl x0, x17, x14 +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x12, x25, x12 +; GISEL-NEXT: ldp x16, x13, [x1, #48] +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x3, x17, x15 +; GISEL-NEXT: csel x12, xzr, x0, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: str x0, [sp, #128] ; 8-byte Folded Spill +; GISEL-NEXT: lsl x2, x16, x14 +; GISEL-NEXT: orr x12, x21, x12 +; GISEL-NEXT: mov x0, x16 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x17, [sp, #144] ; 8-byte Folded Spill +; GISEL-NEXT: csel x12, xzr, x2, eq +; GISEL-NEXT: str x2, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: lsl x2, x13, x14 +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x12, x3, x12 +; GISEL-NEXT: ldr x17, [x1, #64] +; GISEL-NEXT: lsr x6, x0, x15 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x2, eq +; GISEL-NEXT: stp x16, x13, [sp, #152] ; 16-byte Folded Spill +; GISEL-NEXT: mov x16, x13 +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x12, x6, x12 +; GISEL-NEXT: lsl x0, x17, x14 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x16, x16, x15 +; GISEL-NEXT: ldr x13, [x1, #72] +; GISEL-NEXT: csel x12, xzr, x0, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x12, x16, x12 +; GISEL-NEXT: stp x16, x0, [sp, #288] ; 16-byte Folded Spill +; GISEL-NEXT: lsr x0, x17, x15 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: lsl x12, x13, x14 +; GISEL-NEXT: mov x16, x13 +; GISEL-NEXT: str x13, [sp, #192] ; 8-byte Folded Spill +; GISEL-NEXT: ldp x13, x5, [x1, #80] +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x17, [sp, #176] ; 8-byte Folded Spill +; GISEL-NEXT: csel x17, xzr, x12, eq +; GISEL-NEXT: str x0, [sp, #112] ; 8-byte Folded Spill +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: orr x17, x0, x17 +; GISEL-NEXT: lsl x0, x13, x14 +; GISEL-NEXT: str x12, [sp, #280] ; 8-byte Folded Spill +; GISEL-NEXT: csel x17, x17, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x9, x16, x15 +; GISEL-NEXT: csel x4, xzr, x0, eq +; GISEL-NEXT: str x13, [sp, #200] ; 8-byte Folded Spill +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: stp x9, x0, [sp, #96] ; 16-byte Folded Spill +; GISEL-NEXT: orr x4, x9, x4 +; GISEL-NEXT: lsl x23, x5, x14 +; GISEL-NEXT: lsr x12, x13, x15 +; GISEL-NEXT: ldp x9, x13, [x1, #96] +; GISEL-NEXT: csel x17, x4, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x28, [sp, #256] ; 8-byte Folded Spill +; GISEL-NEXT: csel x4, xzr, x23, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: str x3, [sp, #120] ; 8-byte Folded Spill +; GISEL-NEXT: orr x4, x12, x4 +; GISEL-NEXT: lsl x16, x9, x14 +; GISEL-NEXT: stp x5, x9, [sp, #216] ; 16-byte Folded Spill +; GISEL-NEXT: csel x17, x4, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsl x3, x11, x14 +; GISEL-NEXT: stp x16, x12, [sp, #72] ; 16-byte Folded Spill +; GISEL-NEXT: mov x12, x9 +; GISEL-NEXT: lsr x9, x5, x15 +; GISEL-NEXT: csel x4, xzr, x16, eq +; GISEL-NEXT: lsl x16, x13, x14 +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: orr x4, x9, x4 +; GISEL-NEXT: lsr x12, x12, x15 +; GISEL-NEXT: str x30, [sp, #48] ; 8-byte Folded Spill +; GISEL-NEXT: stp x16, x9, [sp, #56] ; 16-byte Folded Spill +; GISEL-NEXT: ldr x9, [x1, #112] +; GISEL-NEXT: csel x17, x4, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: stp x25, x24, [sp, #16] ; 16-byte Folded Spill +; GISEL-NEXT: ldr x5, [sp, #96] ; 8-byte Folded Reload +; GISEL-NEXT: csel x1, xzr, x16, eq +; GISEL-NEXT: lsl x16, x9, x14 +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: orr x1, x12, x1 +; GISEL-NEXT: stp x13, x9, [sp, #240] ; 16-byte Folded Spill +; GISEL-NEXT: stp x16, x12, [sp, #320] ; 16-byte Folded Spill +; GISEL-NEXT: mov x12, x9 +; GISEL-NEXT: csel x1, x1, x17, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x9, x13, x15 +; GISEL-NEXT: lsr x26, x12, x15 +; GISEL-NEXT: csel x17, xzr, x16, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: str x23, [sp, #272] ; 8-byte Folded Spill +; GISEL-NEXT: orr x13, x9, x17 +; GISEL-NEXT: str x9, [sp, #312] ; 8-byte Folded Spill +; GISEL-NEXT: mov x9, x28 +; GISEL-NEXT: csel x13, x13, x1, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: lsr x22, x9, x15 +; GISEL-NEXT: csel x28, xzr, x27, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: str x2, [sp, #8] ; 8-byte Folded Spill +; GISEL-NEXT: orr x28, x26, x28 +; GISEL-NEXT: ldp x0, x16, [sp, #120] ; 16-byte Folded Reload +; GISEL-NEXT: csel x12, x28, x13, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x13, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: csel x28, xzr, x3, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: stp x22, x3, [sp, #32] ; 16-byte Folded Spill +; GISEL-NEXT: orr x28, x22, x28 +; GISEL-NEXT: ldp x15, x14, [sp, #72] ; 16-byte Folded Reload +; GISEL-NEXT: csel x9, x28, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: mov x28, x24 +; GISEL-NEXT: csel x9, x7, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x7, [sp, #88] ; 8-byte Folded Reload +; GISEL-NEXT: str x9, [sp, #136] ; 8-byte Folded Spill +; GISEL-NEXT: csel x9, xzr, x19, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x20, x9 +; GISEL-NEXT: ldr x12, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x30, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: mov x30, x25 +; GISEL-NEXT: orr x20, x7, x20 +; GISEL-NEXT: ldp x4, x19, [sp, #104] ; 16-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x24, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: mov x24, x6 +; GISEL-NEXT: orr x20, x25, x20 +; GISEL-NEXT: mov x25, x21 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x16, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x20, x21, x20 +; GISEL-NEXT: ldp x1, x17, [sp, #56] ; 16-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x13, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x20, x0, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x2, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x20, x6, x20 +; GISEL-NEXT: ldp x21, x6, [sp, #288] ; 16-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x6, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x20, x21, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x12, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x20, x19, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x4, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: orr x20, x5, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x23, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: ldr x23, [sp, #328] ; 8-byte Folded Reload +; GISEL-NEXT: orr x20, x14, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x15, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: orr x20, x17, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x1, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: orr x20, x23, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: ldp x23, x20, [sp, #312] ; 16-byte Folded Reload +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x20, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: orr x20, x23, x20 +; GISEL-NEXT: mov x23, x26 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x27, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: orr x20, x26, x20 +; GISEL-NEXT: ldr x26, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x3, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: ldr x3, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: orr x20, x22, x20 +; GISEL-NEXT: mov x22, x23 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x3, x16 +; GISEL-NEXT: str x9, [sp, #232] ; 8-byte Folded Spill +; GISEL-NEXT: ldr x9, [sp, #48] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, xzr, x9, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x7, x9 +; GISEL-NEXT: ldr x7, [sp, #312] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x28, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: mov x28, x21 +; GISEL-NEXT: orr x20, x30, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x16, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: mov x16, x0 +; GISEL-NEXT: orr x20, x25, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x13, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x20, x0, x20 +; GISEL-NEXT: mov x0, x19 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x2, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: mov x2, x4 +; GISEL-NEXT: orr x20, x24, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x6, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: mov x6, x5 +; GISEL-NEXT: orr x20, x21, x20 +; GISEL-NEXT: mov x21, x25 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x12, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: ldr x12, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: orr x20, x19, x20 +; GISEL-NEXT: mov x19, x27 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x4, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x20, x5, x20 +; GISEL-NEXT: ldp x30, x4, [sp, #320] ; 16-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: mov x5, x3 +; GISEL-NEXT: csel x20, xzr, x26, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: orr x20, x14, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x15, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: orr x20, x17, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x1, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: orr x20, x4, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x30, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: orr x20, x7, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x27, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: orr x20, x23, x20 +; GISEL-NEXT: ldp x27, x23, [sp, #32] ; 16-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x23, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: orr x20, x27, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: str x9, [sp, #208] ; 8-byte Folded Spill +; GISEL-NEXT: ldp x12, x9, [sp, #16] ; 16-byte Folded Reload +; GISEL-NEXT: csel x9, xzr, x9, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x12, x9 +; GISEL-NEXT: ldr x12, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x3, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: ldr x3, [sp, #296] ; 8-byte Folded Reload +; GISEL-NEXT: orr x20, x25, x20 +; GISEL-NEXT: ldr x25, [sp, #280] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x13, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: ldr x13, [sp, #8] ; 8-byte Folded Reload +; GISEL-NEXT: orr x20, x16, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x13, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x20, x24, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x3, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x20, x28, x20 +; GISEL-NEXT: mov x28, x16 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x25, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x20, x0, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x2, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x20, x6, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x26, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: mov x26, x14 +; GISEL-NEXT: orr x20, x14, x20 +; GISEL-NEXT: mov x14, x15 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x15, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: mov x15, x17 +; GISEL-NEXT: orr x20, x17, x20 +; GISEL-NEXT: mov x17, x1 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x1, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: mov x1, x4 +; GISEL-NEXT: orr x20, x4, x20 +; GISEL-NEXT: mov x4, x30 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x30, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: ldr x30, [sp, #272] ; 8-byte Folded Reload +; GISEL-NEXT: orr x20, x7, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x19, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: orr x20, x22, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x23, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: orr x20, x27, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x12, [sp, #168] ; 8-byte Folded Reload +; GISEL-NEXT: str x9, [sp, #184] ; 8-byte Folded Spill +; GISEL-NEXT: csel x9, xzr, x5, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x21, x9 +; GISEL-NEXT: ldr x5, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: mov x21, x0 +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x5, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x20, x16, x20 +; GISEL-NEXT: mov x16, x24 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x13, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x20, x24, x20 +; GISEL-NEXT: ldr x24, [sp, #288] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x3, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x20, x24, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x25, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x20, x0, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x2, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x20, x6, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x30, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x20, x26, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x14, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x20, x15, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x17, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: orr x20, x1, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x4, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: orr x20, x7, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x19, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: orr x20, x22, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x23, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: orr x20, x27, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x12, [sp, #144] ; 8-byte Folded Reload +; GISEL-NEXT: str x9, [sp, #168] ; 8-byte Folded Spill +; GISEL-NEXT: csel x9, xzr, x5, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x28, x9 +; GISEL-NEXT: mov x28, x3 +; GISEL-NEXT: mov x5, x7 +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x13, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x20, x16, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x3, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x20, x24, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x25, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x20, x0, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x2, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x20, x6, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x30, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x20, x26, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x14, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x20, x15, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x17, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x20, x1, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x4, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: orr x20, x7, x20 +; GISEL-NEXT: mov x7, x19 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x19, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: mov x19, x22 +; GISEL-NEXT: orr x20, x22, x20 +; GISEL-NEXT: mov x22, x23 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x20, xzr, x23, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: orr x20, x27, x20 +; GISEL-NEXT: csel x9, x20, x9, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x12, [sp, #152] ; 8-byte Folded Reload +; GISEL-NEXT: str x9, [sp, #304] ; 8-byte Folded Spill +; GISEL-NEXT: csel x9, xzr, x13, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x16, x9 +; GISEL-NEXT: mov x16, x0 +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x3, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x3, x24, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x25, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x3, x21, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x2, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x3, x6, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x30, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x3, x26, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x14, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x3, x15, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x17, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x3, x1, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x4, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x3, x5, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x7, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: orr x3, x19, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x23, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: orr x3, x27, x3 +; GISEL-NEXT: csel x9, x3, x9, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: mov x3, x2 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x20, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x12, [sp, #160] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, xzr, x28, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x24, x9 +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x25, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x0, x21, x0 +; GISEL-NEXT: mov x21, x6 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x2, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x0, x6, x0 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x30, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x0, x26, x0 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x14, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x0, x15, x0 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x17, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x0, x1, x0 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x4, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x0, x5, x0 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x7, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x0, x19, x0 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x23, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: orr x0, x27, x0 +; GISEL-NEXT: csel x9, x0, x9, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x2, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x12, [sp, #176] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, xzr, x25, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x16, x9 +; GISEL-NEXT: ldr x16, [sp, #216] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x3, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x13, x6, x13 +; GISEL-NEXT: csel x9, x13, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x30, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x13, x26, x13 +; GISEL-NEXT: csel x9, x13, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x14, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x13, x15, x13 +; GISEL-NEXT: csel x9, x13, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x17, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x13, x1, x13 +; GISEL-NEXT: csel x9, x13, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x4, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x13, x5, x13 +; GISEL-NEXT: csel x9, x13, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x7, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x13, x19, x13 +; GISEL-NEXT: csel x9, x13, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x23, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: orr x13, x27, x13 +; GISEL-NEXT: csel x9, x13, x9, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x6, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x9, xzr, x3, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x9, x21, x9 +; GISEL-NEXT: csel x9, x9, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x30, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x12, x26, x12 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x14, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x12, x15, x12 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x17, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x12, x1, x12 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x4, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x12, x5, x12 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x7, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x12, x19, x12 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x23, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: orr x12, x27, x12 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: ldr x12, [sp, #192] ; 8-byte Folded Reload +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x9, x12, x9, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x30, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x12, x26, x12 +; GISEL-NEXT: ldp x29, x30, [sp, #416] ; 16-byte Folded Reload +; GISEL-NEXT: csel x12, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x14, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x13, x15, x13 +; GISEL-NEXT: ldp x26, x25, [sp, #352] ; 16-byte Folded Reload +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x17, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x13, x1, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x4, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x13, x5, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x7, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: orr x13, x19, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x13, xzr, x23, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: orr x13, x27, x13 +; GISEL-NEXT: csel x12, x13, x12, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: ldr x13, [sp, #200] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x13, x13, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x14, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: ldr x14, [sp, #264] ; 8-byte Folded Reload +; GISEL-NEXT: orr x12, x15, x12 +; GISEL-NEXT: ldr x15, [sp, #136] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: stp x9, x13, [x14, #72] +; GISEL-NEXT: csel x0, xzr, x17, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: str x15, [x14] +; GISEL-NEXT: orr x0, x1, x0 +; GISEL-NEXT: ldr x15, [sp, #232] ; 8-byte Folded Reload +; GISEL-NEXT: stp x2, x6, [x14, #56] +; GISEL-NEXT: csel x12, x0, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x4, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: str x15, [x14, #8] +; GISEL-NEXT: orr x0, x5, x0 +; GISEL-NEXT: ldr x15, [sp, #208] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, x0, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x7, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: str x15, [x14, #16] +; GISEL-NEXT: orr x0, x19, x0 +; GISEL-NEXT: ldr x15, [sp, #184] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, x0, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x0, xzr, x23, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: str x15, [x14, #24] +; GISEL-NEXT: orr x0, x27, x0 +; GISEL-NEXT: ldr x15, [sp, #168] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, x0, x12, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: str x15, [x14, #32] +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: ldr x15, [sp, #304] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: stp x15, x20, [x14, #40] +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x0, x16, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x16, [sp, #224] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, xzr, x17, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x12, x1, x12 +; GISEL-NEXT: csel x12, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x4, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x3, x5, x3 +; GISEL-NEXT: csel x12, x3, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x7, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x3, x19, x3 +; GISEL-NEXT: csel x12, x3, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x3, xzr, x23, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: orr x3, x27, x3 +; GISEL-NEXT: ldp x24, x23, [sp, #368] ; 16-byte Folded Reload +; GISEL-NEXT: csel x12, x3, x12, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x3, x16, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x16, [sp, #240] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, xzr, x4, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: stp x0, x3, [x14, #88] +; GISEL-NEXT: orr x12, x5, x12 +; GISEL-NEXT: csel x12, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x4, xzr, x7, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x4, x19, x4 +; GISEL-NEXT: csel x12, x4, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x4, xzr, x22, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: orr x4, x27, x4 +; GISEL-NEXT: csel x12, x4, x12, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x4, x16, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: ldr x16, [sp, #248] ; 8-byte Folded Reload +; GISEL-NEXT: csel x12, xzr, x7, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: orr x12, x19, x12 +; GISEL-NEXT: ldp x20, x19, [sp, #400] ; 16-byte Folded Reload +; GISEL-NEXT: csel x12, x12, x11, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x17, xzr, x22, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: orr x17, x27, x17 +; GISEL-NEXT: csel x12, x17, x12, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x17, x16, x12, eq +; GISEL-NEXT: tst x8, #0x3f +; GISEL-NEXT: csel x12, xzr, x22, eq +; GISEL-NEXT: cmp x10, #0 +; GISEL-NEXT: stp x4, x17, [x14, #104] +; GISEL-NEXT: orr x12, x27, x12 +; GISEL-NEXT: ldp x22, x21, [sp, #384] ; 16-byte Folded Reload +; GISEL-NEXT: csel x12, x12, x11, eq +; GISEL-NEXT: cmp x10, #1 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #2 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #3 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #4 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #5 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #6 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #7 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #8 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #9 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #10 +; GISEL-NEXT: csel x12, x11, x12, eq +; GISEL-NEXT: cmp x10, #11 +; GISEL-NEXT: csel x9, x11, x12, eq +; GISEL-NEXT: cmp x10, #12 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #13 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #14 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x10, #15 +; GISEL-NEXT: csel x9, x11, x9, eq +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: ldr x8, [sp, #256] ; 8-byte Folded Reload +; GISEL-NEXT: ldp x28, x27, [sp, #336] ; 16-byte Folded Reload +; GISEL-NEXT: csel x8, x8, x9, eq +; GISEL-NEXT: str x8, [x14, #120] +; GISEL-NEXT: add sp, sp, #432 +; GISEL-NEXT: ret +entry: + %input_val = load i1024, ptr %input, align 128 + %shift_ext = zext i32 %shift to i1024 + %shifted = ashr i1024 %input_val, %shift_ext + store i1024 %shifted, ptr %result, align 128 + ret void +} + + +; Constant shift tests. + +; Zero shift tests +define void @test_shl_i512_const_zero(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_zero: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr q0, [x1] +; SDAG-NEXT: ldp x11, x10, [x1, #16] +; SDAG-NEXT: ldp x13, x12, [x1, #32] +; SDAG-NEXT: str q0, [x0] +; SDAG-NEXT: stp x9, x8, [x0, #48] +; SDAG-NEXT: stp x11, x10, [x0, #16] +; SDAG-NEXT: stp x13, x12, [x0, #32] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_zero: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: ldp x14, x15, [x1, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: stp x10, x11, [x0, #16] +; GISEL-NEXT: stp x12, x13, [x0, #32] +; GISEL-NEXT: stp x14, x15, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 0 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_zero(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_zero: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr q0, [x1] +; SDAG-NEXT: ldp x11, x10, [x1, #16] +; SDAG-NEXT: ldp x13, x12, [x1, #32] +; SDAG-NEXT: str q0, [x0] +; SDAG-NEXT: stp x9, x8, [x0, #48] +; SDAG-NEXT: stp x11, x10, [x0, #16] +; SDAG-NEXT: stp x13, x12, [x0, #32] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_zero: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: ldp x14, x15, [x1, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: stp x10, x11, [x0, #16] +; GISEL-NEXT: stp x12, x13, [x0, #32] +; GISEL-NEXT: stp x14, x15, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 0 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_zero(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_zero: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr q0, [x1] +; SDAG-NEXT: ldp x11, x10, [x1, #16] +; SDAG-NEXT: ldp x13, x12, [x1, #32] +; SDAG-NEXT: str q0, [x0] +; SDAG-NEXT: stp x9, x8, [x0, #48] +; SDAG-NEXT: stp x11, x10, [x0, #16] +; SDAG-NEXT: stp x13, x12, [x0, #32] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_zero: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: ldp x14, x15, [x1, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: stp x10, x11, [x0, #16] +; GISEL-NEXT: stp x12, x13, [x0, #32] +; GISEL-NEXT: stp x14, x15, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 0 + store i512 %shifted, ptr %result, align 64 + ret void +} + +; Word-aligned constant shifts (32-bit multiples for i512 -> i32 narrowing) +define void @test_shl_i512_const_32(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_32: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x12, x13, [x1, #16] +; SDAG-NEXT: ldur x8, [x1, #36] +; SDAG-NEXT: ldp x14, x15, [x1] +; SDAG-NEXT: ldur x9, [x1, #28] +; SDAG-NEXT: ldur x10, [x1, #44] +; SDAG-NEXT: ldur x11, [x1, #52] +; SDAG-NEXT: stp x9, x8, [x0, #32] +; SDAG-NEXT: extr x9, x13, x12, #32 +; SDAG-NEXT: stp x10, x11, [x0, #48] +; SDAG-NEXT: extr x10, x12, x15, #32 +; SDAG-NEXT: lsl x8, x14, #32 +; SDAG-NEXT: stp x10, x9, [x0, #16] +; SDAG-NEXT: extr x10, x15, x14, #32 +; SDAG-NEXT: stp x8, x10, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_32: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldp x11, x12, [x1, #16] +; GISEL-NEXT: ldp x14, x15, [x1, #32] +; GISEL-NEXT: lsr x10, x8, #32 +; GISEL-NEXT: lsr x13, x9, #32 +; GISEL-NEXT: lsl x8, x8, #32 +; GISEL-NEXT: orr x9, x10, x9, lsl #32 +; GISEL-NEXT: lsr x10, x11, #32 +; GISEL-NEXT: orr x11, x13, x11, lsl #32 +; GISEL-NEXT: ldp x13, x16, [x1, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: lsr x8, x12, #32 +; GISEL-NEXT: orr x10, x10, x12, lsl #32 +; GISEL-NEXT: lsr x12, x14, #32 +; GISEL-NEXT: lsr x9, x15, #32 +; GISEL-NEXT: orr x8, x8, x14, lsl #32 +; GISEL-NEXT: stp x11, x10, [x0, #16] +; GISEL-NEXT: orr x11, x12, x15, lsl #32 +; GISEL-NEXT: lsr x12, x13, #32 +; GISEL-NEXT: orr x9, x9, x13, lsl #32 +; GISEL-NEXT: stp x8, x11, [x0, #32] +; GISEL-NEXT: orr x8, x12, x16, lsl #32 +; GISEL-NEXT: stp x9, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 32 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_32(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_32: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x11, x10, [x1, #48] +; SDAG-NEXT: ldur x8, [x1, #12] +; SDAG-NEXT: ldp x15, x14, [x1, #32] +; SDAG-NEXT: ldur x9, [x1, #4] +; SDAG-NEXT: ldp x12, x13, [x1, #16] +; SDAG-NEXT: extr x16, x10, x11, #32 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: lsr x9, x10, #32 +; SDAG-NEXT: extr x8, x14, x15, #32 +; SDAG-NEXT: extr x10, x11, x14, #32 +; SDAG-NEXT: stp x16, x9, [x0, #48] +; SDAG-NEXT: extr x9, x13, x12, #32 +; SDAG-NEXT: stp x8, x10, [x0, #32] +; SDAG-NEXT: extr x8, x15, x13, #32 +; SDAG-NEXT: stp x9, x8, [x0, #16] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_32: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x11, [x1] +; GISEL-NEXT: ldp x10, x14, [x1, #24] +; GISEL-NEXT: ldr x16, [x1, #56] +; GISEL-NEXT: lsl x12, x8, #32 +; GISEL-NEXT: lsl x13, x9, #32 +; GISEL-NEXT: lsl x15, x10, #32 +; GISEL-NEXT: orr x11, x12, x11, lsr #32 +; GISEL-NEXT: orr x8, x13, x8, lsr #32 +; GISEL-NEXT: lsl x13, x14, #32 +; GISEL-NEXT: orr x9, x15, x9, lsr #32 +; GISEL-NEXT: ldp x12, x15, [x1, #40] +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: orr x10, x13, x10, lsr #32 +; GISEL-NEXT: lsl x8, x16, #32 +; GISEL-NEXT: lsl x11, x12, #32 +; GISEL-NEXT: lsl x13, x15, #32 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x8, x8, x15, lsr #32 +; GISEL-NEXT: lsr x10, x16, #32 +; GISEL-NEXT: orr x11, x11, x14, lsr #32 +; GISEL-NEXT: orr x9, x13, x12, lsr #32 +; GISEL-NEXT: stp x8, x10, [x0, #48] +; GISEL-NEXT: stp x11, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 32 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_32(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_32: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x11, x10, [x1, #48] +; SDAG-NEXT: ldur x8, [x1, #12] +; SDAG-NEXT: ldp x15, x14, [x1, #32] +; SDAG-NEXT: ldur x9, [x1, #4] +; SDAG-NEXT: ldp x12, x13, [x1, #16] +; SDAG-NEXT: extr x16, x10, x11, #32 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: asr x9, x10, #32 +; SDAG-NEXT: extr x8, x14, x15, #32 +; SDAG-NEXT: extr x10, x11, x14, #32 +; SDAG-NEXT: stp x16, x9, [x0, #48] +; SDAG-NEXT: extr x9, x13, x12, #32 +; SDAG-NEXT: stp x8, x10, [x0, #32] +; SDAG-NEXT: extr x8, x15, x13, #32 +; SDAG-NEXT: stp x9, x8, [x0, #16] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_32: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x11, [x1] +; GISEL-NEXT: ldp x10, x13, [x1, #24] +; GISEL-NEXT: ldr x17, [x1, #56] +; GISEL-NEXT: lsl x12, x8, #32 +; GISEL-NEXT: lsl x15, x9, #32 +; GISEL-NEXT: lsl x16, x10, #32 +; GISEL-NEXT: orr x11, x12, x11, lsr #32 +; GISEL-NEXT: ldp x14, x12, [x1, #40] +; GISEL-NEXT: orr x8, x15, x8, lsr #32 +; GISEL-NEXT: lsl x15, x13, #32 +; GISEL-NEXT: orr x9, x16, x9, lsr #32 +; GISEL-NEXT: asr x16, x17, #63 +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: lsl x11, x14, #32 +; GISEL-NEXT: orr x10, x15, x10, lsr #32 +; GISEL-NEXT: lsl x15, x12, #32 +; GISEL-NEXT: orr x8, x11, x13, lsr #32 +; GISEL-NEXT: lsl x11, x17, #32 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x9, x15, x14, lsr #32 +; GISEL-NEXT: lsl x13, x16, #32 +; GISEL-NEXT: orr x10, x11, x12, lsr #32 +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: orr x8, x13, x17, asr #32 +; GISEL-NEXT: stp x10, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 32 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_shl_i512_const_64(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_64: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x10, x8, [x1, #40] +; SDAG-NEXT: ldr q0, [x1] +; SDAG-NEXT: ldp x12, x9, [x1, #24] +; SDAG-NEXT: ldr x11, [x1, #16] +; SDAG-NEXT: str xzr, [x0] +; SDAG-NEXT: stp x10, x8, [x0, #48] +; SDAG-NEXT: stp x12, x9, [x0, #32] +; SDAG-NEXT: str x11, [x0, #24] +; SDAG-NEXT: stur q0, [x0, #8] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_64: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldr x14, [x1, #48] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: stp xzr, x8, [x0] +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: stp x11, x12, [x0, #32] +; GISEL-NEXT: stp x13, x14, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 64 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_64(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_64: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x11, x8, [x1, #48] +; SDAG-NEXT: ldur q0, [x1, #8] +; SDAG-NEXT: ldp x10, x9, [x1, #24] +; SDAG-NEXT: ldr x12, [x1, #40] +; SDAG-NEXT: str q0, [x0] +; SDAG-NEXT: stp x8, xzr, [x0, #48] +; SDAG-NEXT: stp x12, x11, [x0, #32] +; SDAG-NEXT: stp x10, x9, [x0, #16] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_64: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x14, [x1, #56] +; GISEL-NEXT: ldp x10, x11, [x1, #24] +; GISEL-NEXT: ldp x12, x13, [x1, #40] +; GISEL-NEXT: stp x14, xzr, [x0, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: stp x10, x11, [x0, #16] +; GISEL-NEXT: stp x12, x13, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 64 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_64(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_64: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x8, x9, [x1, #40] +; SDAG-NEXT: ldr x12, [x1, #56] +; SDAG-NEXT: ldp x11, x10, [x1, #24] +; SDAG-NEXT: ldur q0, [x1, #8] +; SDAG-NEXT: stp x8, x9, [x0, #32] +; SDAG-NEXT: asr x8, x12, #63 +; SDAG-NEXT: stp x11, x10, [x0, #16] +; SDAG-NEXT: str q0, [x0] +; SDAG-NEXT: stp x12, x8, [x0, #48] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_64: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x14, [x1, #56] +; GISEL-NEXT: ldp x10, x11, [x1, #24] +; GISEL-NEXT: ldp x12, x13, [x1, #40] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: asr x8, x14, #63 +; GISEL-NEXT: stp x10, x11, [x0, #16] +; GISEL-NEXT: stp x12, x13, [x0, #32] +; GISEL-NEXT: stp x14, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 64 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_shl_i512_const_96(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_96: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x12, x11, [x1, #16] +; SDAG-NEXT: ldur x9, [x1, #36] +; SDAG-NEXT: ldur x10, [x1, #44] +; SDAG-NEXT: ldur x8, [x1, #28] +; SDAG-NEXT: ldp x13, x14, [x1] +; SDAG-NEXT: stp x9, x10, [x0, #48] +; SDAG-NEXT: extr x9, x11, x12, #32 +; SDAG-NEXT: extr x10, x14, x13, #32 +; SDAG-NEXT: stp x9, x8, [x0, #32] +; SDAG-NEXT: extr x8, x12, x14, #32 +; SDAG-NEXT: lsl x9, x13, #32 +; SDAG-NEXT: stp x10, x8, [x0, #16] +; SDAG-NEXT: stp xzr, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_96: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldr x15, [x1, #48] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: lsr x14, x8, #32 +; GISEL-NEXT: lsr x16, x9, #32 +; GISEL-NEXT: lsl x8, x8, #32 +; GISEL-NEXT: orr x9, x14, x9, lsl #32 +; GISEL-NEXT: lsr x14, x10, #32 +; GISEL-NEXT: orr x10, x16, x10, lsl #32 +; GISEL-NEXT: stp xzr, x8, [x0] +; GISEL-NEXT: lsr x8, x11, #32 +; GISEL-NEXT: orr x11, x14, x11, lsl #32 +; GISEL-NEXT: lsr x14, x12, #32 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: lsr x9, x13, #32 +; GISEL-NEXT: orr x8, x8, x12, lsl #32 +; GISEL-NEXT: orr x10, x14, x13, lsl #32 +; GISEL-NEXT: orr x9, x9, x15, lsl #32 +; GISEL-NEXT: stp x11, x8, [x0, #32] +; GISEL-NEXT: stp x10, x9, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 96 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_96(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_96: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x10, [x1, #48] +; SDAG-NEXT: ldur x8, [x1, #20] +; SDAG-NEXT: ldp x13, x14, [x1, #32] +; SDAG-NEXT: ldur x11, [x1, #12] +; SDAG-NEXT: ldur x12, [x1, #28] +; SDAG-NEXT: lsr x15, x10, #32 +; SDAG-NEXT: stp x11, x8, [x0] +; SDAG-NEXT: extr x8, x10, x9, #32 +; SDAG-NEXT: extr x11, x9, x14, #32 +; SDAG-NEXT: extr x9, x14, x13, #32 +; SDAG-NEXT: stp x15, xzr, [x0, #48] +; SDAG-NEXT: stp x11, x8, [x0, #32] +; SDAG-NEXT: stp x12, x9, [x0, #16] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_96: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x10, [x1, #8] +; GISEL-NEXT: ldp x11, x14, [x1, #32] +; GISEL-NEXT: ldp x15, x16, [x1, #48] +; GISEL-NEXT: lsl x12, x8, #32 +; GISEL-NEXT: lsl x13, x9, #32 +; GISEL-NEXT: orr x10, x12, x10, lsr #32 +; GISEL-NEXT: lsl x12, x11, #32 +; GISEL-NEXT: orr x8, x13, x8, lsr #32 +; GISEL-NEXT: lsl x13, x14, #32 +; GISEL-NEXT: orr x9, x12, x9, lsr #32 +; GISEL-NEXT: stp x10, x8, [x0] +; GISEL-NEXT: lsl x10, x15, #32 +; GISEL-NEXT: orr x11, x13, x11, lsr #32 +; GISEL-NEXT: lsl x12, x16, #32 +; GISEL-NEXT: orr x8, x10, x14, lsr #32 +; GISEL-NEXT: lsr x10, x16, #32 +; GISEL-NEXT: stp x9, x11, [x0, #16] +; GISEL-NEXT: orr x9, x12, x15, lsr #32 +; GISEL-NEXT: stp x10, xzr, [x0, #48] +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 96 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_96(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_96: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x10, [x1, #48] +; SDAG-NEXT: ldur x8, [x1, #12] +; SDAG-NEXT: ldur x11, [x1, #20] +; SDAG-NEXT: ldur x12, [x1, #28] +; SDAG-NEXT: ldp x13, x14, [x1, #32] +; SDAG-NEXT: asr x15, x10, #32 +; SDAG-NEXT: stp x8, x11, [x0] +; SDAG-NEXT: asr x8, x10, #63 +; SDAG-NEXT: extr x11, x9, x14, #32 +; SDAG-NEXT: stp x15, x8, [x0, #48] +; SDAG-NEXT: extr x9, x10, x9, #32 +; SDAG-NEXT: extr x8, x14, x13, #32 +; SDAG-NEXT: stp x11, x9, [x0, #32] +; SDAG-NEXT: stp x12, x8, [x0, #16] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_96: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x11, [x1, #8] +; GISEL-NEXT: ldp x10, x13, [x1, #32] +; GISEL-NEXT: lsl x12, x8, #32 +; GISEL-NEXT: lsl x14, x9, #32 +; GISEL-NEXT: lsl x15, x10, #32 +; GISEL-NEXT: orr x11, x12, x11, lsr #32 +; GISEL-NEXT: ldp x12, x16, [x1, #48] +; GISEL-NEXT: orr x8, x14, x8, lsr #32 +; GISEL-NEXT: lsl x14, x13, #32 +; GISEL-NEXT: orr x9, x15, x9, lsr #32 +; GISEL-NEXT: asr x15, x16, #63 +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: lsl x11, x12, #32 +; GISEL-NEXT: orr x10, x14, x10, lsr #32 +; GISEL-NEXT: lsl x14, x16, #32 +; GISEL-NEXT: orr x8, x11, x13, lsr #32 +; GISEL-NEXT: lsl x11, x15, #32 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x9, x14, x12, lsr #32 +; GISEL-NEXT: orr x10, x11, x16, asr #32 +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: stp x10, x15, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 96 + store i512 %shifted, ptr %result, align 64 + ret void +} + +; Bit-only shifts (< 64 bits) +define void @test_shl_i512_const_1(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_1: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x8, x9, [x1, #40] +; SDAG-NEXT: ldr x10, [x1, #56] +; SDAG-NEXT: ldp x13, x11, [x1, #24] +; SDAG-NEXT: ldp x15, x14, [x1, #8] +; SDAG-NEXT: extr x12, x9, x8, #63 +; SDAG-NEXT: extr x9, x10, x9, #63 +; SDAG-NEXT: ldr x10, [x1] +; SDAG-NEXT: extr x16, x11, x13, #63 +; SDAG-NEXT: extr x8, x8, x11, #63 +; SDAG-NEXT: stp x12, x9, [x0, #48] +; SDAG-NEXT: extr x9, x14, x15, #63 +; SDAG-NEXT: extr x11, x13, x14, #63 +; SDAG-NEXT: stp x16, x8, [x0, #32] +; SDAG-NEXT: lsl x8, x10, #1 +; SDAG-NEXT: stp x9, x11, [x0, #16] +; SDAG-NEXT: extr x9, x15, x10, #63 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_1: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldp x11, x12, [x1, #16] +; GISEL-NEXT: ldp x14, x15, [x1, #32] +; GISEL-NEXT: lsr x10, x8, #63 +; GISEL-NEXT: lsr x13, x9, #63 +; GISEL-NEXT: lsl x8, x8, #1 +; GISEL-NEXT: orr x9, x10, x9, lsl #1 +; GISEL-NEXT: lsr x10, x11, #63 +; GISEL-NEXT: orr x11, x13, x11, lsl #1 +; GISEL-NEXT: ldp x13, x16, [x1, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: lsr x8, x12, #63 +; GISEL-NEXT: orr x10, x10, x12, lsl #1 +; GISEL-NEXT: lsr x12, x14, #63 +; GISEL-NEXT: lsr x9, x15, #63 +; GISEL-NEXT: orr x8, x8, x14, lsl #1 +; GISEL-NEXT: stp x11, x10, [x0, #16] +; GISEL-NEXT: orr x11, x12, x15, lsl #1 +; GISEL-NEXT: lsr x12, x13, #63 +; GISEL-NEXT: orr x9, x9, x13, lsl #1 +; GISEL-NEXT: stp x8, x11, [x0, #32] +; GISEL-NEXT: orr x8, x12, x16, lsl #1 +; GISEL-NEXT: stp x9, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 1 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_1(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_1: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldp x11, x10, [x1, #32] +; SDAG-NEXT: ldp x16, x15, [x1, #16] +; SDAG-NEXT: extr x12, x8, x9, #1 +; SDAG-NEXT: lsr x8, x8, #1 +; SDAG-NEXT: ldp x13, x14, [x1] +; SDAG-NEXT: extr x9, x9, x10, #1 +; SDAG-NEXT: stp x12, x8, [x0, #48] +; SDAG-NEXT: extr x12, x10, x11, #1 +; SDAG-NEXT: extr x8, x15, x16, #1 +; SDAG-NEXT: extr x10, x11, x15, #1 +; SDAG-NEXT: stp x12, x9, [x0, #32] +; SDAG-NEXT: extr x9, x14, x13, #1 +; SDAG-NEXT: stp x8, x10, [x0, #16] +; SDAG-NEXT: extr x8, x16, x14, #1 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_1: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x11, [x1] +; GISEL-NEXT: ldp x10, x14, [x1, #24] +; GISEL-NEXT: ldr x16, [x1, #56] +; GISEL-NEXT: lsl x12, x8, #63 +; GISEL-NEXT: lsl x13, x9, #63 +; GISEL-NEXT: lsl x15, x10, #63 +; GISEL-NEXT: orr x11, x12, x11, lsr #1 +; GISEL-NEXT: orr x8, x13, x8, lsr #1 +; GISEL-NEXT: lsl x13, x14, #63 +; GISEL-NEXT: orr x9, x15, x9, lsr #1 +; GISEL-NEXT: ldp x12, x15, [x1, #40] +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: orr x10, x13, x10, lsr #1 +; GISEL-NEXT: lsl x8, x16, #63 +; GISEL-NEXT: lsl x11, x12, #63 +; GISEL-NEXT: lsl x13, x15, #63 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x8, x8, x15, lsr #1 +; GISEL-NEXT: lsr x10, x16, #1 +; GISEL-NEXT: orr x11, x11, x14, lsr #1 +; GISEL-NEXT: orr x9, x13, x12, lsr #1 +; GISEL-NEXT: stp x8, x10, [x0, #48] +; GISEL-NEXT: stp x11, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 1 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_1(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_1: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldp x11, x10, [x1, #32] +; SDAG-NEXT: ldp x16, x15, [x1, #16] +; SDAG-NEXT: extr x12, x8, x9, #1 +; SDAG-NEXT: asr x8, x8, #1 +; SDAG-NEXT: ldp x13, x14, [x1] +; SDAG-NEXT: extr x9, x9, x10, #1 +; SDAG-NEXT: stp x12, x8, [x0, #48] +; SDAG-NEXT: extr x12, x10, x11, #1 +; SDAG-NEXT: extr x8, x15, x16, #1 +; SDAG-NEXT: extr x10, x11, x15, #1 +; SDAG-NEXT: stp x12, x9, [x0, #32] +; SDAG-NEXT: extr x9, x14, x13, #1 +; SDAG-NEXT: stp x8, x10, [x0, #16] +; SDAG-NEXT: extr x8, x16, x14, #1 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_1: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x11, [x1] +; GISEL-NEXT: ldp x10, x13, [x1, #24] +; GISEL-NEXT: ldr x17, [x1, #56] +; GISEL-NEXT: lsl x12, x8, #63 +; GISEL-NEXT: lsl x15, x9, #63 +; GISEL-NEXT: lsl x16, x10, #63 +; GISEL-NEXT: orr x11, x12, x11, lsr #1 +; GISEL-NEXT: ldp x14, x12, [x1, #40] +; GISEL-NEXT: orr x8, x15, x8, lsr #1 +; GISEL-NEXT: lsl x15, x13, #63 +; GISEL-NEXT: orr x9, x16, x9, lsr #1 +; GISEL-NEXT: asr x16, x17, #63 +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: lsl x11, x14, #63 +; GISEL-NEXT: orr x10, x15, x10, lsr #1 +; GISEL-NEXT: lsl x15, x12, #63 +; GISEL-NEXT: orr x8, x11, x13, lsr #1 +; GISEL-NEXT: lsl x11, x17, #63 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x9, x15, x14, lsr #1 +; GISEL-NEXT: lsl x13, x16, #63 +; GISEL-NEXT: orr x10, x11, x12, lsr #1 +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: orr x8, x13, x17, asr #1 +; GISEL-NEXT: stp x10, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 1 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_shl_i512_const_15(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_15: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x8, x9, [x1, #40] +; SDAG-NEXT: ldr x10, [x1, #56] +; SDAG-NEXT: ldp x13, x11, [x1, #24] +; SDAG-NEXT: ldp x15, x14, [x1, #8] +; SDAG-NEXT: extr x12, x9, x8, #49 +; SDAG-NEXT: extr x9, x10, x9, #49 +; SDAG-NEXT: ldr x10, [x1] +; SDAG-NEXT: extr x16, x11, x13, #49 +; SDAG-NEXT: extr x8, x8, x11, #49 +; SDAG-NEXT: stp x12, x9, [x0, #48] +; SDAG-NEXT: extr x9, x14, x15, #49 +; SDAG-NEXT: extr x11, x13, x14, #49 +; SDAG-NEXT: stp x16, x8, [x0, #32] +; SDAG-NEXT: lsl x8, x10, #15 +; SDAG-NEXT: stp x9, x11, [x0, #16] +; SDAG-NEXT: extr x9, x15, x10, #49 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_15: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldp x11, x12, [x1, #16] +; GISEL-NEXT: ldp x14, x15, [x1, #32] +; GISEL-NEXT: lsr x10, x8, #49 +; GISEL-NEXT: lsr x13, x9, #49 +; GISEL-NEXT: lsl x8, x8, #15 +; GISEL-NEXT: orr x9, x10, x9, lsl #15 +; GISEL-NEXT: lsr x10, x11, #49 +; GISEL-NEXT: orr x11, x13, x11, lsl #15 +; GISEL-NEXT: ldp x13, x16, [x1, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: lsr x8, x12, #49 +; GISEL-NEXT: orr x10, x10, x12, lsl #15 +; GISEL-NEXT: lsr x12, x14, #49 +; GISEL-NEXT: lsr x9, x15, #49 +; GISEL-NEXT: orr x8, x8, x14, lsl #15 +; GISEL-NEXT: stp x11, x10, [x0, #16] +; GISEL-NEXT: orr x11, x12, x15, lsl #15 +; GISEL-NEXT: lsr x12, x13, #49 +; GISEL-NEXT: orr x9, x9, x13, lsl #15 +; GISEL-NEXT: stp x8, x11, [x0, #32] +; GISEL-NEXT: orr x8, x12, x16, lsl #15 +; GISEL-NEXT: stp x9, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 15 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_15(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_15: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldp x11, x10, [x1, #32] +; SDAG-NEXT: ldp x16, x15, [x1, #16] +; SDAG-NEXT: extr x12, x8, x9, #15 +; SDAG-NEXT: lsr x8, x8, #15 +; SDAG-NEXT: ldp x13, x14, [x1] +; SDAG-NEXT: extr x9, x9, x10, #15 +; SDAG-NEXT: stp x12, x8, [x0, #48] +; SDAG-NEXT: extr x12, x10, x11, #15 +; SDAG-NEXT: extr x8, x15, x16, #15 +; SDAG-NEXT: extr x10, x11, x15, #15 +; SDAG-NEXT: stp x12, x9, [x0, #32] +; SDAG-NEXT: extr x9, x14, x13, #15 +; SDAG-NEXT: stp x8, x10, [x0, #16] +; SDAG-NEXT: extr x8, x16, x14, #15 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_15: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x11, [x1] +; GISEL-NEXT: ldp x10, x14, [x1, #24] +; GISEL-NEXT: ldr x16, [x1, #56] +; GISEL-NEXT: lsl x12, x8, #49 +; GISEL-NEXT: lsl x13, x9, #49 +; GISEL-NEXT: lsl x15, x10, #49 +; GISEL-NEXT: orr x11, x12, x11, lsr #15 +; GISEL-NEXT: orr x8, x13, x8, lsr #15 +; GISEL-NEXT: lsl x13, x14, #49 +; GISEL-NEXT: orr x9, x15, x9, lsr #15 +; GISEL-NEXT: ldp x12, x15, [x1, #40] +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: orr x10, x13, x10, lsr #15 +; GISEL-NEXT: lsl x8, x16, #49 +; GISEL-NEXT: lsl x11, x12, #49 +; GISEL-NEXT: lsl x13, x15, #49 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x8, x8, x15, lsr #15 +; GISEL-NEXT: lsr x10, x16, #15 +; GISEL-NEXT: orr x11, x11, x14, lsr #15 +; GISEL-NEXT: orr x9, x13, x12, lsr #15 +; GISEL-NEXT: stp x8, x10, [x0, #48] +; GISEL-NEXT: stp x11, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 15 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_15(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_15: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldp x11, x10, [x1, #32] +; SDAG-NEXT: ldp x16, x15, [x1, #16] +; SDAG-NEXT: extr x12, x8, x9, #15 +; SDAG-NEXT: asr x8, x8, #15 +; SDAG-NEXT: ldp x13, x14, [x1] +; SDAG-NEXT: extr x9, x9, x10, #15 +; SDAG-NEXT: stp x12, x8, [x0, #48] +; SDAG-NEXT: extr x12, x10, x11, #15 +; SDAG-NEXT: extr x8, x15, x16, #15 +; SDAG-NEXT: extr x10, x11, x15, #15 +; SDAG-NEXT: stp x12, x9, [x0, #32] +; SDAG-NEXT: extr x9, x14, x13, #15 +; SDAG-NEXT: stp x8, x10, [x0, #16] +; SDAG-NEXT: extr x8, x16, x14, #15 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_15: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x11, [x1] +; GISEL-NEXT: ldp x10, x13, [x1, #24] +; GISEL-NEXT: ldr x17, [x1, #56] +; GISEL-NEXT: lsl x12, x8, #49 +; GISEL-NEXT: lsl x15, x9, #49 +; GISEL-NEXT: lsl x16, x10, #49 +; GISEL-NEXT: orr x11, x12, x11, lsr #15 +; GISEL-NEXT: ldp x14, x12, [x1, #40] +; GISEL-NEXT: orr x8, x15, x8, lsr #15 +; GISEL-NEXT: lsl x15, x13, #49 +; GISEL-NEXT: orr x9, x16, x9, lsr #15 +; GISEL-NEXT: asr x16, x17, #63 +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: lsl x11, x14, #49 +; GISEL-NEXT: orr x10, x15, x10, lsr #15 +; GISEL-NEXT: lsl x15, x12, #49 +; GISEL-NEXT: orr x8, x11, x13, lsr #15 +; GISEL-NEXT: lsl x11, x17, #49 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x9, x15, x14, lsr #15 +; GISEL-NEXT: lsl x13, x16, #49 +; GISEL-NEXT: orr x10, x11, x12, lsr #15 +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: orr x8, x13, x17, asr #15 +; GISEL-NEXT: stp x10, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 15 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_shl_i512_const_63(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_63: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x8, x9, [x1, #40] +; SDAG-NEXT: ldr x10, [x1, #56] +; SDAG-NEXT: ldp x13, x11, [x1, #24] +; SDAG-NEXT: ldp x15, x14, [x1, #8] +; SDAG-NEXT: extr x12, x9, x8, #1 +; SDAG-NEXT: extr x9, x10, x9, #1 +; SDAG-NEXT: ldr x10, [x1] +; SDAG-NEXT: extr x16, x11, x13, #1 +; SDAG-NEXT: extr x8, x8, x11, #1 +; SDAG-NEXT: stp x12, x9, [x0, #48] +; SDAG-NEXT: extr x9, x14, x15, #1 +; SDAG-NEXT: extr x11, x13, x14, #1 +; SDAG-NEXT: stp x16, x8, [x0, #32] +; SDAG-NEXT: lsl x8, x10, #63 +; SDAG-NEXT: stp x9, x11, [x0, #16] +; SDAG-NEXT: extr x9, x15, x10, #1 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_63: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldp x11, x12, [x1, #16] +; GISEL-NEXT: ldp x14, x15, [x1, #32] +; GISEL-NEXT: lsr x10, x8, #1 +; GISEL-NEXT: lsr x13, x9, #1 +; GISEL-NEXT: lsl x8, x8, #63 +; GISEL-NEXT: orr x9, x10, x9, lsl #63 +; GISEL-NEXT: lsr x10, x11, #1 +; GISEL-NEXT: orr x11, x13, x11, lsl #63 +; GISEL-NEXT: ldp x13, x16, [x1, #48] +; GISEL-NEXT: stp x8, x9, [x0] +; GISEL-NEXT: lsr x8, x12, #1 +; GISEL-NEXT: orr x10, x10, x12, lsl #63 +; GISEL-NEXT: lsr x12, x14, #1 +; GISEL-NEXT: lsr x9, x15, #1 +; GISEL-NEXT: orr x8, x8, x14, lsl #63 +; GISEL-NEXT: stp x11, x10, [x0, #16] +; GISEL-NEXT: orr x11, x12, x15, lsl #63 +; GISEL-NEXT: lsr x12, x13, #1 +; GISEL-NEXT: orr x9, x9, x13, lsl #63 +; GISEL-NEXT: stp x8, x11, [x0, #32] +; GISEL-NEXT: orr x8, x12, x16, lsl #63 +; GISEL-NEXT: stp x9, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 63 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_63(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_63: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldp x11, x10, [x1, #32] +; SDAG-NEXT: ldp x16, x15, [x1, #16] +; SDAG-NEXT: extr x12, x8, x9, #63 +; SDAG-NEXT: lsr x8, x8, #63 +; SDAG-NEXT: ldp x13, x14, [x1] +; SDAG-NEXT: extr x9, x9, x10, #63 +; SDAG-NEXT: stp x12, x8, [x0, #48] +; SDAG-NEXT: extr x12, x10, x11, #63 +; SDAG-NEXT: extr x8, x15, x16, #63 +; SDAG-NEXT: extr x10, x11, x15, #63 +; SDAG-NEXT: stp x12, x9, [x0, #32] +; SDAG-NEXT: extr x9, x14, x13, #63 +; SDAG-NEXT: stp x8, x10, [x0, #16] +; SDAG-NEXT: extr x8, x16, x14, #63 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_63: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x11, [x1] +; GISEL-NEXT: ldp x10, x14, [x1, #24] +; GISEL-NEXT: ldr x16, [x1, #56] +; GISEL-NEXT: lsl x12, x8, #1 +; GISEL-NEXT: lsl x13, x9, #1 +; GISEL-NEXT: lsl x15, x10, #1 +; GISEL-NEXT: orr x11, x12, x11, lsr #63 +; GISEL-NEXT: orr x8, x13, x8, lsr #63 +; GISEL-NEXT: lsl x13, x14, #1 +; GISEL-NEXT: orr x9, x15, x9, lsr #63 +; GISEL-NEXT: ldp x12, x15, [x1, #40] +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: orr x10, x13, x10, lsr #63 +; GISEL-NEXT: lsl x8, x16, #1 +; GISEL-NEXT: lsl x11, x12, #1 +; GISEL-NEXT: lsl x13, x15, #1 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x8, x8, x15, lsr #63 +; GISEL-NEXT: lsr x10, x16, #63 +; GISEL-NEXT: orr x11, x11, x14, lsr #63 +; GISEL-NEXT: orr x9, x13, x12, lsr #63 +; GISEL-NEXT: stp x8, x10, [x0, #48] +; GISEL-NEXT: stp x11, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 63 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_63(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_63: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldp x11, x10, [x1, #32] +; SDAG-NEXT: ldp x16, x15, [x1, #16] +; SDAG-NEXT: extr x12, x8, x9, #63 +; SDAG-NEXT: asr x8, x8, #63 +; SDAG-NEXT: ldp x13, x14, [x1] +; SDAG-NEXT: extr x9, x9, x10, #63 +; SDAG-NEXT: stp x12, x8, [x0, #48] +; SDAG-NEXT: extr x12, x10, x11, #63 +; SDAG-NEXT: extr x8, x15, x16, #63 +; SDAG-NEXT: extr x10, x11, x15, #63 +; SDAG-NEXT: stp x12, x9, [x0, #32] +; SDAG-NEXT: extr x9, x14, x13, #63 +; SDAG-NEXT: stp x8, x10, [x0, #16] +; SDAG-NEXT: extr x8, x16, x14, #63 +; SDAG-NEXT: stp x9, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_63: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #8] +; GISEL-NEXT: ldr x10, [x1] +; GISEL-NEXT: ldp x11, x13, [x1, #24] +; GISEL-NEXT: ldr x17, [x1, #56] +; GISEL-NEXT: lsl x15, x9, #1 +; GISEL-NEXT: lsl x12, x8, #1 +; GISEL-NEXT: lsl x16, x11, #1 +; GISEL-NEXT: orr x8, x15, x8, lsr #63 +; GISEL-NEXT: lsl x15, x13, #1 +; GISEL-NEXT: orr x10, x12, x10, lsr #63 +; GISEL-NEXT: ldp x14, x12, [x1, #40] +; GISEL-NEXT: orr x9, x16, x9, lsr #63 +; GISEL-NEXT: orr x11, x15, x11, lsr #63 +; GISEL-NEXT: stp x10, x8, [x0] +; GISEL-NEXT: lsl x8, x17, #1 +; GISEL-NEXT: lsl x16, x14, #1 +; GISEL-NEXT: lsl x10, x12, #1 +; GISEL-NEXT: stp x9, x11, [x0, #16] +; GISEL-NEXT: asr x9, x17, #63 +; GISEL-NEXT: orr x8, x8, x12, lsr #63 +; GISEL-NEXT: orr x13, x16, x13, lsr #63 +; GISEL-NEXT: orr x10, x10, x14, lsr #63 +; GISEL-NEXT: orr x9, x9, x9, lsl #1 +; GISEL-NEXT: stp x13, x10, [x0, #32] +; GISEL-NEXT: stp x8, x9, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 63 + store i512 %shifted, ptr %result, align 64 + ret void +} + +; Mixed word+bit shifts +define void @test_shl_i512_const_65(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_65: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #32] +; SDAG-NEXT: ldr x10, [x1, #48] +; SDAG-NEXT: ldp x12, x11, [x1, #16] +; SDAG-NEXT: extr x13, x8, x9, #63 +; SDAG-NEXT: extr x8, x10, x8, #63 +; SDAG-NEXT: ldp x10, x14, [x1] +; SDAG-NEXT: extr x15, x11, x12, #63 +; SDAG-NEXT: stp x13, x8, [x0, #48] +; SDAG-NEXT: extr x9, x9, x11, #63 +; SDAG-NEXT: extr x8, x14, x10, #63 +; SDAG-NEXT: extr x11, x12, x14, #63 +; SDAG-NEXT: stp x15, x9, [x0, #32] +; SDAG-NEXT: stp x8, x11, [x0, #16] +; SDAG-NEXT: lsl x8, x10, #1 +; SDAG-NEXT: stp xzr, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_65: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldr x15, [x1, #48] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: lsr x14, x8, #63 +; GISEL-NEXT: lsr x16, x9, #63 +; GISEL-NEXT: lsl x8, x8, #1 +; GISEL-NEXT: orr x9, x14, x9, lsl #1 +; GISEL-NEXT: lsr x14, x10, #63 +; GISEL-NEXT: orr x10, x16, x10, lsl #1 +; GISEL-NEXT: stp xzr, x8, [x0] +; GISEL-NEXT: lsr x8, x11, #63 +; GISEL-NEXT: orr x11, x14, x11, lsl #1 +; GISEL-NEXT: lsr x14, x12, #63 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: lsr x9, x13, #63 +; GISEL-NEXT: orr x8, x8, x12, lsl #1 +; GISEL-NEXT: orr x10, x14, x13, lsl #1 +; GISEL-NEXT: orr x9, x9, x15, lsl #1 +; GISEL-NEXT: stp x11, x8, [x0, #32] +; GISEL-NEXT: stp x10, x9, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 65 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_65(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_65: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr x15, [x1, #24] +; SDAG-NEXT: ldp x14, x10, [x1, #32] +; SDAG-NEXT: ldp x11, x12, [x1, #8] +; SDAG-NEXT: lsr x13, x8, #1 +; SDAG-NEXT: extr x8, x8, x9, #1 +; SDAG-NEXT: extr x16, x9, x10, #1 +; SDAG-NEXT: extr x9, x14, x15, #1 +; SDAG-NEXT: extr x10, x10, x14, #1 +; SDAG-NEXT: stp x13, xzr, [x0, #48] +; SDAG-NEXT: stp x16, x8, [x0, #32] +; SDAG-NEXT: extr x8, x12, x11, #1 +; SDAG-NEXT: stp x9, x10, [x0, #16] +; SDAG-NEXT: extr x9, x15, x12, #1 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_65: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x10, [x1, #8] +; GISEL-NEXT: ldp x11, x14, [x1, #32] +; GISEL-NEXT: ldp x15, x16, [x1, #48] +; GISEL-NEXT: lsl x12, x8, #63 +; GISEL-NEXT: lsl x13, x9, #63 +; GISEL-NEXT: orr x10, x12, x10, lsr #1 +; GISEL-NEXT: lsl x12, x11, #63 +; GISEL-NEXT: orr x8, x13, x8, lsr #1 +; GISEL-NEXT: lsl x13, x14, #63 +; GISEL-NEXT: orr x9, x12, x9, lsr #1 +; GISEL-NEXT: stp x10, x8, [x0] +; GISEL-NEXT: lsl x10, x15, #63 +; GISEL-NEXT: orr x11, x13, x11, lsr #1 +; GISEL-NEXT: lsl x12, x16, #63 +; GISEL-NEXT: orr x8, x10, x14, lsr #1 +; GISEL-NEXT: lsr x10, x16, #1 +; GISEL-NEXT: stp x9, x11, [x0, #16] +; GISEL-NEXT: orr x9, x12, x15, lsr #1 +; GISEL-NEXT: stp x10, xzr, [x0, #48] +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 65 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_65(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_65: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr x16, [x1, #24] +; SDAG-NEXT: ldp x15, x10, [x1, #32] +; SDAG-NEXT: ldp x12, x13, [x1, #8] +; SDAG-NEXT: asr x11, x8, #1 +; SDAG-NEXT: asr x14, x8, #63 +; SDAG-NEXT: extr x8, x8, x9, #1 +; SDAG-NEXT: stp x11, x14, [x0, #48] +; SDAG-NEXT: extr x11, x9, x10, #1 +; SDAG-NEXT: extr x9, x15, x16, #1 +; SDAG-NEXT: extr x10, x10, x15, #1 +; SDAG-NEXT: stp x11, x8, [x0, #32] +; SDAG-NEXT: extr x8, x13, x12, #1 +; SDAG-NEXT: stp x9, x10, [x0, #16] +; SDAG-NEXT: extr x9, x16, x13, #1 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_65: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x11, [x1, #8] +; GISEL-NEXT: ldp x10, x13, [x1, #32] +; GISEL-NEXT: lsl x12, x8, #63 +; GISEL-NEXT: lsl x14, x9, #63 +; GISEL-NEXT: lsl x15, x10, #63 +; GISEL-NEXT: orr x11, x12, x11, lsr #1 +; GISEL-NEXT: ldp x12, x16, [x1, #48] +; GISEL-NEXT: orr x8, x14, x8, lsr #1 +; GISEL-NEXT: lsl x14, x13, #63 +; GISEL-NEXT: orr x9, x15, x9, lsr #1 +; GISEL-NEXT: asr x15, x16, #63 +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: lsl x11, x12, #63 +; GISEL-NEXT: orr x10, x14, x10, lsr #1 +; GISEL-NEXT: lsl x14, x16, #63 +; GISEL-NEXT: orr x8, x11, x13, lsr #1 +; GISEL-NEXT: lsl x11, x15, #63 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x9, x14, x12, lsr #1 +; GISEL-NEXT: orr x10, x11, x16, asr #1 +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: stp x10, x15, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 65 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_shl_i512_const_100(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_100: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #32] +; SDAG-NEXT: ldr x10, [x1, #48] +; SDAG-NEXT: ldp x12, x11, [x1, #16] +; SDAG-NEXT: extr x13, x8, x9, #28 +; SDAG-NEXT: extr x8, x10, x8, #28 +; SDAG-NEXT: ldp x10, x14, [x1] +; SDAG-NEXT: extr x15, x11, x12, #28 +; SDAG-NEXT: stp x13, x8, [x0, #48] +; SDAG-NEXT: extr x9, x9, x11, #28 +; SDAG-NEXT: extr x8, x14, x10, #28 +; SDAG-NEXT: extr x11, x12, x14, #28 +; SDAG-NEXT: stp x15, x9, [x0, #32] +; SDAG-NEXT: stp x8, x11, [x0, #16] +; SDAG-NEXT: lsl x8, x10, #36 +; SDAG-NEXT: stp xzr, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_100: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldr x15, [x1, #48] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: lsr x14, x8, #28 +; GISEL-NEXT: lsr x16, x9, #28 +; GISEL-NEXT: lsl x8, x8, #36 +; GISEL-NEXT: orr x9, x14, x9, lsl #36 +; GISEL-NEXT: lsr x14, x10, #28 +; GISEL-NEXT: orr x10, x16, x10, lsl #36 +; GISEL-NEXT: stp xzr, x8, [x0] +; GISEL-NEXT: lsr x8, x11, #28 +; GISEL-NEXT: orr x11, x14, x11, lsl #36 +; GISEL-NEXT: lsr x14, x12, #28 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: lsr x9, x13, #28 +; GISEL-NEXT: orr x8, x8, x12, lsl #36 +; GISEL-NEXT: orr x10, x14, x13, lsl #36 +; GISEL-NEXT: orr x9, x9, x15, lsl #36 +; GISEL-NEXT: stp x11, x8, [x0, #32] +; GISEL-NEXT: stp x10, x9, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 100 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_100(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_100: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr x15, [x1, #24] +; SDAG-NEXT: ldp x14, x10, [x1, #32] +; SDAG-NEXT: ldp x11, x12, [x1, #8] +; SDAG-NEXT: lsr x13, x8, #36 +; SDAG-NEXT: extr x8, x8, x9, #36 +; SDAG-NEXT: extr x16, x9, x10, #36 +; SDAG-NEXT: extr x9, x14, x15, #36 +; SDAG-NEXT: extr x10, x10, x14, #36 +; SDAG-NEXT: stp x13, xzr, [x0, #48] +; SDAG-NEXT: stp x16, x8, [x0, #32] +; SDAG-NEXT: extr x8, x12, x11, #36 +; SDAG-NEXT: stp x9, x10, [x0, #16] +; SDAG-NEXT: extr x9, x15, x12, #36 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_100: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x10, [x1, #8] +; GISEL-NEXT: ldp x11, x14, [x1, #32] +; GISEL-NEXT: ldp x15, x16, [x1, #48] +; GISEL-NEXT: lsl x12, x8, #28 +; GISEL-NEXT: lsl x13, x9, #28 +; GISEL-NEXT: orr x10, x12, x10, lsr #36 +; GISEL-NEXT: lsl x12, x11, #28 +; GISEL-NEXT: orr x8, x13, x8, lsr #36 +; GISEL-NEXT: lsl x13, x14, #28 +; GISEL-NEXT: orr x9, x12, x9, lsr #36 +; GISEL-NEXT: stp x10, x8, [x0] +; GISEL-NEXT: lsl x10, x15, #28 +; GISEL-NEXT: orr x11, x13, x11, lsr #36 +; GISEL-NEXT: lsl x12, x16, #28 +; GISEL-NEXT: orr x8, x10, x14, lsr #36 +; GISEL-NEXT: lsr x10, x16, #36 +; GISEL-NEXT: stp x9, x11, [x0, #16] +; GISEL-NEXT: orr x9, x12, x15, lsr #36 +; GISEL-NEXT: stp x10, xzr, [x0, #48] +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 100 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_100(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_100: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr x16, [x1, #24] +; SDAG-NEXT: ldp x15, x10, [x1, #32] +; SDAG-NEXT: ldp x12, x13, [x1, #8] +; SDAG-NEXT: asr x11, x8, #36 +; SDAG-NEXT: asr x14, x8, #63 +; SDAG-NEXT: extr x8, x8, x9, #36 +; SDAG-NEXT: stp x11, x14, [x0, #48] +; SDAG-NEXT: extr x11, x9, x10, #36 +; SDAG-NEXT: extr x9, x15, x16, #36 +; SDAG-NEXT: extr x10, x10, x15, #36 +; SDAG-NEXT: stp x11, x8, [x0, #32] +; SDAG-NEXT: extr x8, x13, x12, #36 +; SDAG-NEXT: stp x9, x10, [x0, #16] +; SDAG-NEXT: extr x9, x16, x13, #36 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_100: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x11, [x1, #8] +; GISEL-NEXT: ldp x10, x13, [x1, #32] +; GISEL-NEXT: lsl x12, x8, #28 +; GISEL-NEXT: lsl x14, x9, #28 +; GISEL-NEXT: lsl x15, x10, #28 +; GISEL-NEXT: orr x11, x12, x11, lsr #36 +; GISEL-NEXT: ldp x12, x16, [x1, #48] +; GISEL-NEXT: orr x8, x14, x8, lsr #36 +; GISEL-NEXT: lsl x14, x13, #28 +; GISEL-NEXT: orr x9, x15, x9, lsr #36 +; GISEL-NEXT: asr x15, x16, #63 +; GISEL-NEXT: stp x11, x8, [x0] +; GISEL-NEXT: lsl x11, x12, #28 +; GISEL-NEXT: orr x10, x14, x10, lsr #36 +; GISEL-NEXT: lsl x14, x16, #28 +; GISEL-NEXT: orr x8, x11, x13, lsr #36 +; GISEL-NEXT: lsl x11, x15, #28 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: orr x9, x14, x12, lsr #36 +; GISEL-NEXT: orr x10, x11, x16, asr #36 +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: stp x10, x15, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 100 + store i512 %shifted, ptr %result, align 64 + ret void +} + +; Boundary conditions - test exactly at the edge +define void @test_shl_i512_const_127(ptr %result, ptr %input) { +; SDAG-LABEL: test_shl_i512_const_127: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #32] +; SDAG-NEXT: ldr x10, [x1, #48] +; SDAG-NEXT: ldp x12, x11, [x1, #16] +; SDAG-NEXT: extr x13, x8, x9, #1 +; SDAG-NEXT: extr x8, x10, x8, #1 +; SDAG-NEXT: ldp x10, x14, [x1] +; SDAG-NEXT: extr x15, x11, x12, #1 +; SDAG-NEXT: stp x13, x8, [x0, #48] +; SDAG-NEXT: extr x9, x9, x11, #1 +; SDAG-NEXT: extr x8, x14, x10, #1 +; SDAG-NEXT: extr x11, x12, x14, #1 +; SDAG-NEXT: stp x15, x9, [x0, #32] +; SDAG-NEXT: stp x8, x11, [x0, #16] +; SDAG-NEXT: lsl x8, x10, #63 +; SDAG-NEXT: stp xzr, x8, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_shl_i512_const_127: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1] +; GISEL-NEXT: ldr x15, [x1, #48] +; GISEL-NEXT: ldp x10, x11, [x1, #16] +; GISEL-NEXT: ldp x12, x13, [x1, #32] +; GISEL-NEXT: lsr x14, x8, #1 +; GISEL-NEXT: lsr x16, x9, #1 +; GISEL-NEXT: lsl x8, x8, #63 +; GISEL-NEXT: orr x9, x14, x9, lsl #63 +; GISEL-NEXT: lsr x14, x10, #1 +; GISEL-NEXT: orr x10, x16, x10, lsl #63 +; GISEL-NEXT: stp xzr, x8, [x0] +; GISEL-NEXT: lsr x8, x11, #1 +; GISEL-NEXT: orr x11, x14, x11, lsl #63 +; GISEL-NEXT: lsr x14, x12, #1 +; GISEL-NEXT: stp x9, x10, [x0, #16] +; GISEL-NEXT: lsr x9, x13, #1 +; GISEL-NEXT: orr x8, x8, x12, lsl #63 +; GISEL-NEXT: orr x10, x14, x13, lsl #63 +; GISEL-NEXT: orr x9, x9, x15, lsl #63 +; GISEL-NEXT: stp x11, x8, [x0, #32] +; GISEL-NEXT: stp x10, x9, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = shl i512 %input_val, 127 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_lshr_i512_const_127(ptr %result, ptr %input) { +; SDAG-LABEL: test_lshr_i512_const_127: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr x15, [x1, #24] +; SDAG-NEXT: ldp x14, x10, [x1, #32] +; SDAG-NEXT: ldp x11, x12, [x1, #8] +; SDAG-NEXT: lsr x13, x8, #63 +; SDAG-NEXT: extr x8, x8, x9, #63 +; SDAG-NEXT: extr x16, x9, x10, #63 +; SDAG-NEXT: extr x9, x14, x15, #63 +; SDAG-NEXT: extr x10, x10, x14, #63 +; SDAG-NEXT: stp x13, xzr, [x0, #48] +; SDAG-NEXT: stp x16, x8, [x0, #32] +; SDAG-NEXT: extr x8, x12, x11, #63 +; SDAG-NEXT: stp x9, x10, [x0, #16] +; SDAG-NEXT: extr x9, x15, x12, #63 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_lshr_i512_const_127: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x10, [x1, #8] +; GISEL-NEXT: ldp x11, x14, [x1, #32] +; GISEL-NEXT: ldp x15, x16, [x1, #48] +; GISEL-NEXT: lsl x12, x8, #1 +; GISEL-NEXT: lsl x13, x9, #1 +; GISEL-NEXT: orr x10, x12, x10, lsr #63 +; GISEL-NEXT: lsl x12, x11, #1 +; GISEL-NEXT: orr x8, x13, x8, lsr #63 +; GISEL-NEXT: lsl x13, x14, #1 +; GISEL-NEXT: orr x9, x12, x9, lsr #63 +; GISEL-NEXT: stp x10, x8, [x0] +; GISEL-NEXT: lsl x10, x15, #1 +; GISEL-NEXT: orr x11, x13, x11, lsr #63 +; GISEL-NEXT: lsl x12, x16, #1 +; GISEL-NEXT: orr x8, x10, x14, lsr #63 +; GISEL-NEXT: lsr x10, x16, #63 +; GISEL-NEXT: stp x9, x11, [x0, #16] +; GISEL-NEXT: orr x9, x12, x15, lsr #63 +; GISEL-NEXT: stp x10, xzr, [x0, #48] +; GISEL-NEXT: stp x8, x9, [x0, #32] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = lshr i512 %input_val, 127 + store i512 %shifted, ptr %result, align 64 + ret void +} + +define void @test_ashr_i512_const_127(ptr %result, ptr %input) { +; SDAG-LABEL: test_ashr_i512_const_127: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: ldp x9, x8, [x1, #48] +; SDAG-NEXT: ldr x15, [x1, #24] +; SDAG-NEXT: ldp x14, x10, [x1, #32] +; SDAG-NEXT: ldp x12, x13, [x1, #8] +; SDAG-NEXT: asr x11, x8, #63 +; SDAG-NEXT: extr x8, x8, x9, #63 +; SDAG-NEXT: extr x16, x9, x10, #63 +; SDAG-NEXT: extr x9, x14, x15, #63 +; SDAG-NEXT: extr x10, x10, x14, #63 +; SDAG-NEXT: stp x11, x11, [x0, #48] +; SDAG-NEXT: stp x16, x8, [x0, #32] +; SDAG-NEXT: extr x8, x13, x12, #63 +; SDAG-NEXT: stp x9, x10, [x0, #16] +; SDAG-NEXT: extr x9, x15, x13, #63 +; SDAG-NEXT: stp x8, x9, [x0] +; SDAG-NEXT: ret +; +; GISEL-LABEL: test_ashr_i512_const_127: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ldp x8, x9, [x1, #16] +; GISEL-NEXT: ldr x10, [x1, #8] +; GISEL-NEXT: ldp x11, x14, [x1, #32] +; GISEL-NEXT: ldp x15, x16, [x1, #48] +; GISEL-NEXT: lsl x12, x8, #1 +; GISEL-NEXT: lsl x13, x9, #1 +; GISEL-NEXT: orr x10, x12, x10, lsr #63 +; GISEL-NEXT: lsl x12, x11, #1 +; GISEL-NEXT: orr x8, x13, x8, lsr #63 +; GISEL-NEXT: lsl x13, x14, #1 +; GISEL-NEXT: orr x9, x12, x9, lsr #63 +; GISEL-NEXT: lsl x12, x15, #1 +; GISEL-NEXT: stp x10, x8, [x0] +; GISEL-NEXT: lsl x10, x16, #1 +; GISEL-NEXT: orr x11, x13, x11, lsr #63 +; GISEL-NEXT: asr x8, x16, #63 +; GISEL-NEXT: orr x12, x12, x14, lsr #63 +; GISEL-NEXT: stp x9, x11, [x0, #16] +; GISEL-NEXT: orr x9, x10, x15, lsr #63 +; GISEL-NEXT: orr x10, x8, x8, lsl #1 +; GISEL-NEXT: stp x12, x9, [x0, #32] +; GISEL-NEXT: stp x10, x8, [x0, #48] +; GISEL-NEXT: ret +entry: + %input_val = load i512, ptr %input, align 64 + %shifted = ashr i512 %input_val, 127 + store i512 %shifted, ptr %result, align 64 + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll index 014b1c1b936ee..ca9f7637388f7 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -2986,610 +2986,836 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: lshr_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 16(a0) -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 18(a0) -; RV32I-NEXT: lbu a6, 19(a0) -; RV32I-NEXT: lbu a7, 20(a0) -; RV32I-NEXT: lbu t0, 21(a0) -; RV32I-NEXT: lbu t1, 22(a0) -; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a6, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t4, t4, t3 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a5, 1(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a7, 3(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: lbu t4, 8(a0) +; RV32I-NEXT: lbu t5, 9(a0) +; RV32I-NEXT: lbu t6, 10(a0) +; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a7, a7, a6 +; RV32I-NEXT: or t1, t1, t0 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a5, 14(a0) +; RV32I-NEXT: lbu s1, 15(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or t3, t3, t2 +; RV32I-NEXT: or t0, t5, t4 +; RV32I-NEXT: or t5, s0, t6 +; RV32I-NEXT: lbu t2, 1(a1) +; RV32I-NEXT: lbu t4, 0(a1) ; RV32I-NEXT: lbu t6, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: or s0, t2, t4 +; RV32I-NEXT: slli t2, s1, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 -; RV32I-NEXT: li t3, 32 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a4, 16 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli a4, t1, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: or t1, t5, t4 -; RV32I-NEXT: or t5, a4, a7 -; RV32I-NEXT: or a4, t2, t0 -; RV32I-NEXT: slli a4, a4, 3 -; RV32I-NEXT: srl s0, t1, a4 -; RV32I-NEXT: neg s6, a4 -; RV32I-NEXT: sll t4, t5, s6 -; RV32I-NEXT: bltu a4, t3, .LBB12_2 +; RV32I-NEXT: or a1, a1, t6 +; RV32I-NEXT: slli t4, a7, 16 +; RV32I-NEXT: slli a7, t3, 16 +; RV32I-NEXT: slli t3, t5, 16 +; RV32I-NEXT: slli t5, a1, 16 +; RV32I-NEXT: or a1, a7, t1 +; RV32I-NEXT: or a7, t5, s0 +; RV32I-NEXT: slli a7, a7, 3 +; RV32I-NEXT: srli t1, a7, 5 +; RV32I-NEXT: andi t5, a7, 31 +; RV32I-NEXT: neg s3, t5 +; RV32I-NEXT: beqz t5, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a7, t5, a4 -; RV32I-NEXT: j .LBB12_3 +; RV32I-NEXT: sll a4, a1, s3 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: or a7, s0, t4 -; RV32I-NEXT: .LBB12_3: -; RV32I-NEXT: or t0, a6, a3 -; RV32I-NEXT: or a6, a1, a5 -; RV32I-NEXT: mv a1, t1 -; RV32I-NEXT: beqz a4, .LBB12_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a1, a7 +; RV32I-NEXT: or s7, t4, a3 +; RV32I-NEXT: lbu t4, 12(a0) +; RV32I-NEXT: lbu t6, 19(a0) +; RV32I-NEXT: slli s1, a6, 8 +; RV32I-NEXT: or a5, t2, a5 +; RV32I-NEXT: or a3, t3, t0 +; RV32I-NEXT: beqz t1, .LBB12_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: j .LBB12_5 +; RV32I-NEXT: .LBB12_4: +; RV32I-NEXT: srl s0, s7, a7 +; RV32I-NEXT: or s0, s0, a4 ; RV32I-NEXT: .LBB12_5: -; RV32I-NEXT: srl a3, t0, a4 -; RV32I-NEXT: sll a5, a6, s6 -; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t3, .LBB12_7 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t0, 17(a0) +; RV32I-NEXT: lbu a4, 18(a0) +; RV32I-NEXT: slli s4, t6, 8 +; RV32I-NEXT: or s2, s1, t4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: li s5, 1 +; RV32I-NEXT: sll t6, a3, s3 +; RV32I-NEXT: beqz t5, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li ra, 0 -; RV32I-NEXT: srl a3, a6, a4 -; RV32I-NEXT: j .LBB12_8 +; RV32I-NEXT: mv a6, t6 ; RV32I-NEXT: .LBB12_7: -; RV32I-NEXT: srl ra, t5, a4 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: mv a7, t0 -; RV32I-NEXT: beqz a4, .LBB12_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a7, a3 -; RV32I-NEXT: .LBB12_10: -; RV32I-NEXT: sw a5, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s7, t6, a4 -; RV32I-NEXT: bltu a4, t3, .LBB12_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: j .LBB12_13 -; RV32I-NEXT: .LBB12_12: -; RV32I-NEXT: srl a5, a6, a4 +; RV32I-NEXT: lbu t2, 16(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli s1, t0, 8 +; RV32I-NEXT: or t4, s4, a4 +; RV32I-NEXT: srl a4, a1, a7 +; RV32I-NEXT: or a5, a5, s2 +; RV32I-NEXT: bne t1, s5, .LBB12_9 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: or s0, a4, a6 +; RV32I-NEXT: .LBB12_9: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu s5, 21(a0) +; RV32I-NEXT: lbu a6, 22(a0) +; RV32I-NEXT: slli s4, t3, 8 +; RV32I-NEXT: or t2, s1, t2 +; RV32I-NEXT: slli s6, t4, 16 +; RV32I-NEXT: li s8, 2 +; RV32I-NEXT: sll t3, a5, s3 +; RV32I-NEXT: beqz t5, .LBB12_11 +; RV32I-NEXT: # %bb.10: +; RV32I-NEXT: mv t0, t3 +; RV32I-NEXT: .LBB12_11: +; RV32I-NEXT: lbu s1, 20(a0) +; RV32I-NEXT: lbu s2, 27(a0) +; RV32I-NEXT: slli s5, s5, 8 +; RV32I-NEXT: or s4, s4, a6 +; RV32I-NEXT: srl t4, a3, a7 +; RV32I-NEXT: or a6, s6, t2 +; RV32I-NEXT: bne t1, s8, .LBB12_13 +; RV32I-NEXT: # %bb.12: +; RV32I-NEXT: or s0, t4, t0 ; RV32I-NEXT: .LBB12_13: -; RV32I-NEXT: neg s10, s7 -; RV32I-NEXT: sw t4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s7, t3, .LBB12_15 -; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: sll a3, t1, s7 -; RV32I-NEXT: j .LBB12_16 +; RV32I-NEXT: lbu s6, 25(a0) +; RV32I-NEXT: lbu t0, 26(a0) +; RV32I-NEXT: slli s8, s2, 8 +; RV32I-NEXT: or s7, s5, s1 +; RV32I-NEXT: slli s9, s4, 16 +; RV32I-NEXT: sll s11, a6, s3 +; RV32I-NEXT: beqz t5, .LBB12_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: mv t2, s11 ; RV32I-NEXT: .LBB12_15: -; RV32I-NEXT: sll t2, t1, s6 -; RV32I-NEXT: srl a3, t1, s10 -; RV32I-NEXT: or a3, a3, t4 -; RV32I-NEXT: .LBB12_16: -; RV32I-NEXT: addi s9, a4, -64 -; RV32I-NEXT: mv t4, t5 -; RV32I-NEXT: beqz s7, .LBB12_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t4, a3 -; RV32I-NEXT: .LBB12_18: -; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t3, .LBB12_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: srl s2, t5, s9 -; RV32I-NEXT: j .LBB12_21 -; RV32I-NEXT: .LBB12_20: -; RV32I-NEXT: sll a3, t5, s11 -; RV32I-NEXT: or s2, s0, a3 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: lbu s2, 31(a0) +; RV32I-NEXT: slli s5, s6, 8 +; RV32I-NEXT: or s4, s8, t0 +; RV32I-NEXT: srl ra, a5, a7 +; RV32I-NEXT: or t0, s9, s7 +; RV32I-NEXT: li s6, 3 +; RV32I-NEXT: bne t1, s6, .LBB12_17 +; RV32I-NEXT: # %bb.16: +; RV32I-NEXT: or s0, ra, t2 +; RV32I-NEXT: .LBB12_17: +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu s7, 29(a0) +; RV32I-NEXT: lbu s6, 30(a0) +; RV32I-NEXT: slli s8, s2, 8 +; RV32I-NEXT: or s2, s5, s1 +; RV32I-NEXT: slli s5, s4, 16 +; RV32I-NEXT: li s9, 4 +; RV32I-NEXT: sll s1, t0, s3 +; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t5, .LBB12_19 +; RV32I-NEXT: # %bb.18: +; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: .LBB12_19: +; RV32I-NEXT: lbu s1, 28(a0) +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or s4, s8, s6 +; RV32I-NEXT: srl s10, a6, a7 +; RV32I-NEXT: or a0, s5, s2 +; RV32I-NEXT: bne t1, s9, .LBB12_21 +; RV32I-NEXT: # %bb.20: +; RV32I-NEXT: or s0, s10, t2 ; RV32I-NEXT: .LBB12_21: -; RV32I-NEXT: lbu s1, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: mv s0, t1 -; RV32I-NEXT: beqz s9, .LBB12_23 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: or t2, s7, s1 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: li s9, 5 +; RV32I-NEXT: sll s7, a0, s3 +; RV32I-NEXT: beqz t5, .LBB12_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: mv s2, s7 ; RV32I-NEXT: .LBB12_23: -; RV32I-NEXT: lbu s4, 9(a0) -; RV32I-NEXT: lbu s2, 10(a0) -; RV32I-NEXT: lbu s5, 13(a0) -; RV32I-NEXT: lbu s8, 14(a0) -; RV32I-NEXT: slli s3, s1, 8 -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: sw ra, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t3, .LBB12_25 +; RV32I-NEXT: srl s8, t0, a7 +; RV32I-NEXT: or t2, s4, t2 +; RV32I-NEXT: bne t1, s9, .LBB12_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: j .LBB12_26 +; RV32I-NEXT: or s0, s8, s2 ; RV32I-NEXT: .LBB12_25: -; RV32I-NEXT: srl s1, t5, a4 -; RV32I-NEXT: .LBB12_26: -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: lbu ra, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s4, s4, 8 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s8, a3, s8 -; RV32I-NEXT: bgeu a4, t6, .LBB12_28 -; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: or s0, a7, t2 -; RV32I-NEXT: or s1, a5, t4 -; RV32I-NEXT: .LBB12_28: -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: or a5, s4, ra -; RV32I-NEXT: slli t4, s2, 16 -; RV32I-NEXT: or s2, s5, s3 -; RV32I-NEXT: slli s3, s8, 16 -; RV32I-NEXT: mv s4, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a4, .LBB12_30 -; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: mv a7, s1 -; RV32I-NEXT: .LBB12_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) -; RV32I-NEXT: lbu s1, 5(a0) -; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s8, t2, 8 -; RV32I-NEXT: or t4, t4, a5 -; RV32I-NEXT: or t2, s3, s2 -; RV32I-NEXT: bltu a4, t6, .LBB12_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s2, 6 +; RV32I-NEXT: sll s5, t2, s3 +; RV32I-NEXT: beqz t5, .LBB12_27 +; RV32I-NEXT: # %bb.26: +; RV32I-NEXT: mv s4, s5 +; RV32I-NEXT: .LBB12_27: +; RV32I-NEXT: srl s6, a0, a7 +; RV32I-NEXT: bne t1, s2, .LBB12_29 +; RV32I-NEXT: # %bb.28: +; RV32I-NEXT: or s0, s6, s4 +; RV32I-NEXT: .LBB12_29: +; RV32I-NEXT: li s3, 7 +; RV32I-NEXT: srl s1, t2, a7 +; RV32I-NEXT: mv s4, s1 +; RV32I-NEXT: bne t1, s3, .LBB12_34 +; RV32I-NEXT: # %bb.30: +; RV32I-NEXT: bnez a7, .LBB12_35 +; RV32I-NEXT: .LBB12_31: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB12_36 ; RV32I-NEXT: .LBB12_32: -; RV32I-NEXT: slli s3, ra, 8 -; RV32I-NEXT: or a5, s5, a3 -; RV32I-NEXT: lbu s5, 0(a0) -; RV32I-NEXT: lbu a0, 4(a0) -; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: or a3, s8, s0 -; RV32I-NEXT: srl s2, t4, a4 -; RV32I-NEXT: sll ra, t2, s6 -; RV32I-NEXT: bltu a4, t3, .LBB12_34 -; RV32I-NEXT: # %bb.33: -; RV32I-NEXT: srl s0, t2, a4 -; RV32I-NEXT: j .LBB12_35 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: beqz t1, .LBB12_37 +; RV32I-NEXT: .LBB12_33: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB12_38 ; RV32I-NEXT: .LBB12_34: -; RV32I-NEXT: or s0, s2, ra +; RV32I-NEXT: mv s4, s0 +; RV32I-NEXT: beqz a7, .LBB12_31 ; RV32I-NEXT: .LBB12_35: -; RV32I-NEXT: or s3, s3, s5 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: or a0, s1, a0 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: mv s5, t4 -; RV32I-NEXT: beqz a4, .LBB12_37 -; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: mv s5, s0 +; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB12_32 +; RV32I-NEXT: .LBB12_36: +; RV32I-NEXT: mv s0, t6 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: bnez t1, .LBB12_33 ; RV32I-NEXT: .LBB12_37: -; RV32I-NEXT: or s0, a5, s3 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: bltu a4, t3, .LBB12_39 -; RV32I-NEXT: # %bb.38: -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: srl a3, a0, a4 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: bnez a4, .LBB12_40 -; RV32I-NEXT: j .LBB12_41 -; RV32I-NEXT: .LBB12_39: -; RV32I-NEXT: srl s8, t2, a4 -; RV32I-NEXT: srl a3, s0, a4 -; RV32I-NEXT: sll a5, a0, s6 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: beqz a4, .LBB12_41 +; RV32I-NEXT: or a4, a4, s0 +; RV32I-NEXT: .LBB12_38: +; RV32I-NEXT: li s0, 1 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB12_57 +; RV32I-NEXT: # %bb.39: +; RV32I-NEXT: beq t1, s0, .LBB12_58 ; RV32I-NEXT: .LBB12_40: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB12_59 ; RV32I-NEXT: .LBB12_41: -; RV32I-NEXT: bltu a4, t3, .LBB12_44 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: bgeu s7, t3, .LBB12_45 +; RV32I-NEXT: beq t1, s4, .LBB12_60 +; RV32I-NEXT: .LBB12_42: +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB12_61 ; RV32I-NEXT: .LBB12_43: -; RV32I-NEXT: sll s3, t4, s6 -; RV32I-NEXT: srl a3, t4, s10 -; RV32I-NEXT: or a3, a3, ra -; RV32I-NEXT: mv s10, t2 -; RV32I-NEXT: bnez s7, .LBB12_46 -; RV32I-NEXT: j .LBB12_47 +; RV32I-NEXT: li s4, 3 +; RV32I-NEXT: bne t1, s4, .LBB12_45 ; RV32I-NEXT: .LBB12_44: -; RV32I-NEXT: srl s1, a0, a4 -; RV32I-NEXT: bltu s7, t3, .LBB12_43 +; RV32I-NEXT: or a4, s10, t6 ; RV32I-NEXT: .LBB12_45: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t4, s7 -; RV32I-NEXT: mv s10, t2 -; RV32I-NEXT: beqz s7, .LBB12_47 -; RV32I-NEXT: .LBB12_46: -; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: bnez t5, .LBB12_62 +; RV32I-NEXT: # %bb.46: +; RV32I-NEXT: beq t1, s4, .LBB12_63 ; RV32I-NEXT: .LBB12_47: -; RV32I-NEXT: bltu s9, t3, .LBB12_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl a3, t2, s9 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: bnez s9, .LBB12_50 -; RV32I-NEXT: j .LBB12_51 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB12_64 +; RV32I-NEXT: .LBB12_48: +; RV32I-NEXT: beq t1, s9, .LBB12_65 ; RV32I-NEXT: .LBB12_49: -; RV32I-NEXT: sll a3, t2, s11 -; RV32I-NEXT: or a3, s2, a3 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: beqz s9, .LBB12_51 +; RV32I-NEXT: mv t6, s1 +; RV32I-NEXT: bne t1, s2, .LBB12_66 ; RV32I-NEXT: .LBB12_50: -; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB12_67 ; RV32I-NEXT: .LBB12_51: -; RV32I-NEXT: bltu s9, t3, .LBB12_53 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bltu a4, t6, .LBB12_54 -; RV32I-NEXT: j .LBB12_55 +; RV32I-NEXT: beqz a7, .LBB12_53 +; RV32I-NEXT: .LBB12_52: +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: .LBB12_53: -; RV32I-NEXT: srl s7, t2, a4 -; RV32I-NEXT: bgeu a4, t6, .LBB12_55 -; RV32I-NEXT: .LBB12_54: -; RV32I-NEXT: or s2, a5, s3 -; RV32I-NEXT: or s7, s1, s10 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t6, 2 +; RV32I-NEXT: beqz t5, .LBB12_55 +; RV32I-NEXT: # %bb.54: +; RV32I-NEXT: mv a4, t3 ; RV32I-NEXT: .LBB12_55: -; RV32I-NEXT: li a3, 128 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: beqz a4, .LBB12_57 +; RV32I-NEXT: beqz t1, .LBB12_68 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv a5, s2 -; RV32I-NEXT: mv s1, s7 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB12_69 ; RV32I-NEXT: .LBB12_57: -; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s2, a3, a4 -; RV32I-NEXT: bltu a4, t6, .LBB12_59 -; RV32I-NEXT: # %bb.58: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s8, 0 +; RV32I-NEXT: mv t6, t3 +; RV32I-NEXT: bne t1, s0, .LBB12_40 +; RV32I-NEXT: .LBB12_58: +; RV32I-NEXT: or a4, t4, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB12_41 ; RV32I-NEXT: .LBB12_59: -; RV32I-NEXT: neg s3, s2 -; RV32I-NEXT: srl a5, t0, s3 -; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t3, .LBB12_61 -; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: li s10, 0 -; RV32I-NEXT: sll a3, t0, s2 -; RV32I-NEXT: j .LBB12_62 +; RV32I-NEXT: mv t6, s11 +; RV32I-NEXT: bne t1, s4, .LBB12_42 +; RV32I-NEXT: .LBB12_60: +; RV32I-NEXT: or a4, ra, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB12_43 ; RV32I-NEXT: .LBB12_61: -; RV32I-NEXT: sll s10, t0, s6 -; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lw t6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s4, 3 +; RV32I-NEXT: beq t1, s4, .LBB12_44 +; RV32I-NEXT: j .LBB12_45 ; RV32I-NEXT: .LBB12_62: -; RV32I-NEXT: sub s1, t6, s2 -; RV32I-NEXT: mv s8, a6 -; RV32I-NEXT: beqz s2, .LBB12_64 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv s8, a3 +; RV32I-NEXT: mv t6, s7 +; RV32I-NEXT: bne t1, s4, .LBB12_47 +; RV32I-NEXT: .LBB12_63: +; RV32I-NEXT: or a4, s8, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB12_48 ; RV32I-NEXT: .LBB12_64: -; RV32I-NEXT: bltu s1, t3, .LBB12_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: srl a3, a6, s1 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: bnez s1, .LBB12_67 -; RV32I-NEXT: j .LBB12_68 +; RV32I-NEXT: mv t6, s5 +; RV32I-NEXT: bne t1, s9, .LBB12_49 +; RV32I-NEXT: .LBB12_65: +; RV32I-NEXT: or a4, s6, t6 +; RV32I-NEXT: mv t6, s1 +; RV32I-NEXT: beq t1, s2, .LBB12_50 ; RV32I-NEXT: .LBB12_66: -; RV32I-NEXT: neg a3, s1 -; RV32I-NEXT: sll a3, a6, a3 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: beqz s1, .LBB12_68 +; RV32I-NEXT: mv t6, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB12_51 ; RV32I-NEXT: .LBB12_67: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a4, t6 +; RV32I-NEXT: bnez a7, .LBB12_52 +; RV32I-NEXT: j .LBB12_53 ; RV32I-NEXT: .LBB12_68: -; RV32I-NEXT: bltu s1, t3, .LBB12_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, t3, .LBB12_72 -; RV32I-NEXT: .LBB12_70: -; RV32I-NEXT: sll s6, t1, s6 -; RV32I-NEXT: srl a3, t1, s3 -; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB12_73 +; RV32I-NEXT: or a4, t4, a4 +; RV32I-NEXT: .LBB12_69: +; RV32I-NEXT: li t4, 3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_84 +; RV32I-NEXT: # %bb.70: +; RV32I-NEXT: beq t1, s0, .LBB12_85 ; RV32I-NEXT: .LBB12_71: -; RV32I-NEXT: srl s1, a6, s3 -; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t3, .LBB12_70 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_86 ; RV32I-NEXT: .LBB12_72: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: sll a3, t1, s2 +; RV32I-NEXT: beq t1, t6, .LBB12_87 ; RV32I-NEXT: .LBB12_73: -; RV32I-NEXT: addi s9, s2, -64 -; RV32I-NEXT: mv s5, t5 -; RV32I-NEXT: beqz s2, .LBB12_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_88 +; RV32I-NEXT: .LBB12_74: +; RV32I-NEXT: beq t1, t4, .LBB12_89 ; RV32I-NEXT: .LBB12_75: -; RV32I-NEXT: bltu s9, t3, .LBB12_77 -; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t0, s9 -; RV32I-NEXT: mv s7, a6 -; RV32I-NEXT: bnez s9, .LBB12_78 -; RV32I-NEXT: j .LBB12_79 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_90 +; RV32I-NEXT: .LBB12_76: +; RV32I-NEXT: beq t1, s4, .LBB12_91 ; RV32I-NEXT: .LBB12_77: -; RV32I-NEXT: sll s3, t0, s2 -; RV32I-NEXT: neg a3, s9 -; RV32I-NEXT: srl a3, t0, a3 -; RV32I-NEXT: sll s7, a6, s2 -; RV32I-NEXT: or a3, a3, s7 -; RV32I-NEXT: mv s7, a6 -; RV32I-NEXT: beqz s9, .LBB12_79 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s9, .LBB12_92 ; RV32I-NEXT: .LBB12_78: -; RV32I-NEXT: mv s7, a3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB12_93 ; RV32I-NEXT: .LBB12_79: -; RV32I-NEXT: bltu s2, t6, .LBB12_81 -; RV32I-NEXT: # %bb.80: -; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: j .LBB12_82 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB12_94 +; RV32I-NEXT: .LBB12_80: +; RV32I-NEXT: bnez a7, .LBB12_95 ; RV32I-NEXT: .LBB12_81: -; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s3, a5, s6 -; RV32I-NEXT: or s7, s1, s5 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB12_96 ; RV32I-NEXT: .LBB12_82: -; RV32I-NEXT: addi ra, a4, -128 -; RV32I-NEXT: mv s5, t1 -; RV32I-NEXT: mv s6, t5 -; RV32I-NEXT: beqz s2, .LBB12_84 -; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: mv s5, s3 -; RV32I-NEXT: mv s6, s7 +; RV32I-NEXT: beqz t1, .LBB12_97 +; RV32I-NEXT: .LBB12_83: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB12_98 ; RV32I-NEXT: .LBB12_84: -; RV32I-NEXT: neg s9, ra -; RV32I-NEXT: sll s3, t5, s9 -; RV32I-NEXT: bltu ra, t3, .LBB12_86 -; RV32I-NEXT: # %bb.85: -; RV32I-NEXT: srl a3, t5, ra -; RV32I-NEXT: mv s1, t1 -; RV32I-NEXT: bnez ra, .LBB12_87 -; RV32I-NEXT: j .LBB12_88 +; RV32I-NEXT: mv t3, s11 +; RV32I-NEXT: bne t1, s0, .LBB12_71 +; RV32I-NEXT: .LBB12_85: +; RV32I-NEXT: or a4, ra, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_72 ; RV32I-NEXT: .LBB12_86: -; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: mv s1, t1 -; RV32I-NEXT: beqz ra, .LBB12_88 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, t6, .LBB12_73 ; RV32I-NEXT: .LBB12_87: -; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: or a4, s10, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_74 ; RV32I-NEXT: .LBB12_88: -; RV32I-NEXT: bltu ra, t3, .LBB12_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: srl a3, a6, ra -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: bnez ra, .LBB12_91 -; RV32I-NEXT: j .LBB12_92 +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bne t1, t4, .LBB12_75 +; RV32I-NEXT: .LBB12_89: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_76 ; RV32I-NEXT: .LBB12_90: -; RV32I-NEXT: srl s2, t5, a4 -; RV32I-NEXT: sll a3, a6, s9 -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: beqz ra, .LBB12_92 +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, s4, .LBB12_77 ; RV32I-NEXT: .LBB12_91: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s9, .LBB12_78 ; RV32I-NEXT: .LBB12_92: -; RV32I-NEXT: sub s10, t6, ra -; RV32I-NEXT: bltu ra, t3, .LBB12_95 -; RV32I-NEXT: # %bb.93: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bgeu s10, t3, .LBB12_96 +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB12_79 +; RV32I-NEXT: .LBB12_93: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB12_80 ; RV32I-NEXT: .LBB12_94: -; RV32I-NEXT: sll s9, t1, s9 -; RV32I-NEXT: neg a3, s10 -; RV32I-NEXT: srl a3, t1, a3 -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB12_97 +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB12_81 ; RV32I-NEXT: .LBB12_95: -; RV32I-NEXT: srl s7, a6, a4 -; RV32I-NEXT: bltu s10, t3, .LBB12_94 +; RV32I-NEXT: mv a3, t3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB12_82 ; RV32I-NEXT: .LBB12_96: -; RV32I-NEXT: li s9, 0 -; RV32I-NEXT: sll a3, t1, s10 +; RV32I-NEXT: mv a4, s11 +; RV32I-NEXT: bnez t1, .LBB12_83 ; RV32I-NEXT: .LBB12_97: -; RV32I-NEXT: addi s11, ra, -64 -; RV32I-NEXT: mv s3, t5 -; RV32I-NEXT: beqz s10, .LBB12_99 -; RV32I-NEXT: # %bb.98: -; RV32I-NEXT: mv s3, a3 -; RV32I-NEXT: .LBB12_99: -; RV32I-NEXT: bltu s11, t3, .LBB12_101 -; RV32I-NEXT: # %bb.100: -; RV32I-NEXT: srl a3, t5, s11 -; RV32I-NEXT: bnez s11, .LBB12_102 -; RV32I-NEXT: j .LBB12_103 +; RV32I-NEXT: or a4, ra, a4 +; RV32I-NEXT: .LBB12_98: +; RV32I-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_112 +; RV32I-NEXT: # %bb.99: +; RV32I-NEXT: beq t1, s0, .LBB12_113 +; RV32I-NEXT: .LBB12_100: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_114 ; RV32I-NEXT: .LBB12_101: -; RV32I-NEXT: srl a3, t1, ra -; RV32I-NEXT: neg s10, s11 -; RV32I-NEXT: sll s10, t5, s10 -; RV32I-NEXT: or a3, a3, s10 -; RV32I-NEXT: beqz s11, .LBB12_103 +; RV32I-NEXT: beq t1, t6, .LBB12_115 ; RV32I-NEXT: .LBB12_102: -; RV32I-NEXT: mv t1, a3 -; RV32I-NEXT: .LBB12_103: -; RV32I-NEXT: bltu s11, t3, .LBB12_105 -; RV32I-NEXT: # %bb.104: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bltu ra, t6, .LBB12_106 -; RV32I-NEXT: j .LBB12_107 +; RV32I-NEXT: bnez t5, .LBB12_116 +; RV32I-NEXT: .LBB12_103: +; RV32I-NEXT: beq t1, t4, .LBB12_117 +; RV32I-NEXT: .LBB12_104: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s4, .LBB12_118 ; RV32I-NEXT: .LBB12_105: -; RV32I-NEXT: srl t3, t5, ra -; RV32I-NEXT: bgeu ra, t6, .LBB12_107 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB12_119 ; RV32I-NEXT: .LBB12_106: -; RV32I-NEXT: or t1, a5, s9 -; RV32I-NEXT: or t3, s7, s3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB12_120 ; RV32I-NEXT: .LBB12_107: -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: bnez ra, .LBB12_114 -; RV32I-NEXT: # %bb.108: -; RV32I-NEXT: bgeu ra, t6, .LBB12_115 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB12_121 +; RV32I-NEXT: .LBB12_108: +; RV32I-NEXT: bnez a7, .LBB12_122 ; RV32I-NEXT: .LBB12_109: -; RV32I-NEXT: bltu a4, a5, .LBB12_116 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB12_123 ; RV32I-NEXT: .LBB12_110: -; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a4, .LBB12_117 +; RV32I-NEXT: beqz t1, .LBB12_124 ; RV32I-NEXT: .LBB12_111: -; RV32I-NEXT: bltu a4, a5, .LBB12_113 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_125 +; RV32I-NEXT: j .LBB12_126 ; RV32I-NEXT: .LBB12_112: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s0, .LBB12_100 ; RV32I-NEXT: .LBB12_113: -; RV32I-NEXT: srli a4, s0, 16 -; RV32I-NEXT: lui t1, 16 -; RV32I-NEXT: srli t0, s0, 24 -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: srli a6, t4, 16 -; RV32I-NEXT: srli s2, t4, 24 -; RV32I-NEXT: srli t3, t2, 16 -; RV32I-NEXT: srli s3, t2, 24 -; RV32I-NEXT: srli s1, s4, 16 -; RV32I-NEXT: srli a3, s4, 24 -; RV32I-NEXT: srli t6, a7, 16 -; RV32I-NEXT: srli s6, a7, 24 -; RV32I-NEXT: srli s5, a1, 16 -; RV32I-NEXT: srli s7, a1, 24 -; RV32I-NEXT: srli s8, ra, 16 -; RV32I-NEXT: srli s9, ra, 24 -; RV32I-NEXT: addi t1, t1, -1 -; RV32I-NEXT: and s10, s0, t1 -; RV32I-NEXT: and s11, a0, t1 +; RV32I-NEXT: or a4, s10, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_101 +; RV32I-NEXT: .LBB12_114: +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bne t1, t6, .LBB12_102 +; RV32I-NEXT: .LBB12_115: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_103 +; RV32I-NEXT: .LBB12_116: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, t4, .LBB12_104 +; RV32I-NEXT: .LBB12_117: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s4, .LBB12_105 +; RV32I-NEXT: .LBB12_118: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB12_106 +; RV32I-NEXT: .LBB12_119: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB12_107 +; RV32I-NEXT: .LBB12_120: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB12_108 +; RV32I-NEXT: .LBB12_121: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a7, .LBB12_109 +; RV32I-NEXT: .LBB12_122: +; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB12_110 +; RV32I-NEXT: .LBB12_123: +; RV32I-NEXT: lw a4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t1, .LBB12_111 +; RV32I-NEXT: .LBB12_124: +; RV32I-NEXT: or a4, s10, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_126 +; RV32I-NEXT: .LBB12_125: +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: .LBB12_126: +; RV32I-NEXT: beq t1, s0, .LBB12_138 +; RV32I-NEXT: # %bb.127: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_139 +; RV32I-NEXT: .LBB12_128: +; RV32I-NEXT: beq t1, t6, .LBB12_140 +; RV32I-NEXT: .LBB12_129: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, t4, .LBB12_141 +; RV32I-NEXT: .LBB12_130: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s4, .LBB12_142 +; RV32I-NEXT: .LBB12_131: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s9, .LBB12_143 +; RV32I-NEXT: .LBB12_132: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB12_144 +; RV32I-NEXT: .LBB12_133: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB12_145 +; RV32I-NEXT: .LBB12_134: +; RV32I-NEXT: bnez a7, .LBB12_146 +; RV32I-NEXT: .LBB12_135: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB12_147 +; RV32I-NEXT: .LBB12_136: +; RV32I-NEXT: beqz t1, .LBB12_148 +; RV32I-NEXT: .LBB12_137: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB12_149 +; RV32I-NEXT: j .LBB12_150 +; RV32I-NEXT: .LBB12_138: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_128 +; RV32I-NEXT: .LBB12_139: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, t6, .LBB12_129 +; RV32I-NEXT: .LBB12_140: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, t4, .LBB12_130 +; RV32I-NEXT: .LBB12_141: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s4, .LBB12_131 +; RV32I-NEXT: .LBB12_142: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s9, .LBB12_132 +; RV32I-NEXT: .LBB12_143: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB12_133 +; RV32I-NEXT: .LBB12_144: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB12_134 +; RV32I-NEXT: .LBB12_145: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB12_135 +; RV32I-NEXT: .LBB12_146: +; RV32I-NEXT: mv a6, t3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB12_136 +; RV32I-NEXT: .LBB12_147: +; RV32I-NEXT: mv a4, s7 +; RV32I-NEXT: bnez t1, .LBB12_137 +; RV32I-NEXT: .LBB12_148: +; RV32I-NEXT: or a4, s8, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB12_150 +; RV32I-NEXT: .LBB12_149: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: .LBB12_150: +; RV32I-NEXT: beq t1, s0, .LBB12_161 +; RV32I-NEXT: # %bb.151: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, t6, .LBB12_162 +; RV32I-NEXT: .LBB12_152: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t4, .LBB12_163 +; RV32I-NEXT: .LBB12_153: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s4, .LBB12_164 +; RV32I-NEXT: .LBB12_154: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB12_165 +; RV32I-NEXT: .LBB12_155: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB12_166 +; RV32I-NEXT: .LBB12_156: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB12_167 +; RV32I-NEXT: .LBB12_157: +; RV32I-NEXT: bnez a7, .LBB12_168 +; RV32I-NEXT: .LBB12_158: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB12_169 +; RV32I-NEXT: .LBB12_159: +; RV32I-NEXT: beqz t1, .LBB12_170 +; RV32I-NEXT: .LBB12_160: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s0, .LBB12_171 +; RV32I-NEXT: j .LBB12_172 +; RV32I-NEXT: .LBB12_161: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, t6, .LBB12_152 +; RV32I-NEXT: .LBB12_162: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, t4, .LBB12_153 +; RV32I-NEXT: .LBB12_163: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s4, .LBB12_154 +; RV32I-NEXT: .LBB12_164: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB12_155 +; RV32I-NEXT: .LBB12_165: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB12_156 +; RV32I-NEXT: .LBB12_166: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB12_157 +; RV32I-NEXT: .LBB12_167: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a7, .LBB12_158 +; RV32I-NEXT: .LBB12_168: +; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB12_159 +; RV32I-NEXT: .LBB12_169: +; RV32I-NEXT: mv a4, s5 +; RV32I-NEXT: bnez t1, .LBB12_160 +; RV32I-NEXT: .LBB12_170: +; RV32I-NEXT: or a4, s6, a4 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s0, .LBB12_172 +; RV32I-NEXT: .LBB12_171: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: .LBB12_172: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t6, .LBB12_190 +; RV32I-NEXT: # %bb.173: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, t4, .LBB12_191 +; RV32I-NEXT: .LBB12_174: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s4, .LBB12_192 +; RV32I-NEXT: .LBB12_175: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s9, .LBB12_193 +; RV32I-NEXT: .LBB12_176: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB12_194 +; RV32I-NEXT: .LBB12_177: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB12_195 +; RV32I-NEXT: .LBB12_178: +; RV32I-NEXT: bnez a7, .LBB12_196 +; RV32I-NEXT: .LBB12_179: +; RV32I-NEXT: bnez t1, .LBB12_197 +; RV32I-NEXT: .LBB12_180: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s0, .LBB12_198 +; RV32I-NEXT: .LBB12_181: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, t6, .LBB12_199 +; RV32I-NEXT: .LBB12_182: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t4, .LBB12_200 +; RV32I-NEXT: .LBB12_183: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s4, .LBB12_201 +; RV32I-NEXT: .LBB12_184: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB12_202 +; RV32I-NEXT: .LBB12_185: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB12_203 +; RV32I-NEXT: .LBB12_186: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB12_204 +; RV32I-NEXT: .LBB12_187: +; RV32I-NEXT: beqz a7, .LBB12_189 +; RV32I-NEXT: .LBB12_188: +; RV32I-NEXT: mv t2, a4 +; RV32I-NEXT: .LBB12_189: +; RV32I-NEXT: srli a4, ra, 16 +; RV32I-NEXT: lui t4, 16 +; RV32I-NEXT: srli t3, ra, 24 +; RV32I-NEXT: srli a7, a1, 16 +; RV32I-NEXT: srli t6, a1, 24 +; RV32I-NEXT: srli t1, a3, 16 +; RV32I-NEXT: srli s2, a3, 24 +; RV32I-NEXT: srli t5, a5, 16 +; RV32I-NEXT: srli s3, a5, 24 +; RV32I-NEXT: srli s1, a6, 16 +; RV32I-NEXT: srli s6, a6, 24 +; RV32I-NEXT: srli s0, t0, 16 +; RV32I-NEXT: srli s5, t0, 24 +; RV32I-NEXT: srli s4, a0, 16 +; RV32I-NEXT: srli s7, a0, 24 +; RV32I-NEXT: srli s8, t2, 16 +; RV32I-NEXT: srli s9, t2, 24 +; RV32I-NEXT: addi t4, t4, -1 +; RV32I-NEXT: and s10, ra, t4 +; RV32I-NEXT: and s11, a1, t4 ; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb s0, 0(a2) +; RV32I-NEXT: sb ra, 0(a2) ; RV32I-NEXT: sb s10, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t0, 3(a2) -; RV32I-NEXT: and a4, t4, t1 -; RV32I-NEXT: srli t0, s11, 8 -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: sb t0, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t5, 7(a2) -; RV32I-NEXT: and a0, t2, t1 +; RV32I-NEXT: sb t3, 3(a2) +; RV32I-NEXT: and a4, a3, t4 +; RV32I-NEXT: srli t3, s11, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb t3, 5(a2) +; RV32I-NEXT: sb a7, 6(a2) +; RV32I-NEXT: sb t6, 7(a2) +; RV32I-NEXT: and a1, a5, t4 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t4, 8(a2) +; RV32I-NEXT: sb a3, 8(a2) ; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb a6, 10(a2) +; RV32I-NEXT: sb t1, 10(a2) ; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, s4, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb t2, 12(a2) -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t3, 14(a2) +; RV32I-NEXT: and a3, a6, t4 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a5, 12(a2) +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: sb t5, 14(a2) ; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a0, a7, t1 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s4, 16(a2) -; RV32I-NEXT: sb a4, 17(a2) +; RV32I-NEXT: and a1, t0, t4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a6, 16(a2) +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a3, a1, t1 -; RV32I-NEXT: and a4, ra, t1 -; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb s6, 19(a2) +; RV32I-NEXT: and a3, a0, t4 +; RV32I-NEXT: and a4, t2, t4 +; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 20(a2) -; RV32I-NEXT: sb a0, 21(a2) -; RV32I-NEXT: sb t6, 22(a2) -; RV32I-NEXT: sb s6, 23(a2) -; RV32I-NEXT: sb a1, 24(a2) +; RV32I-NEXT: sb t0, 20(a2) +; RV32I-NEXT: sb a1, 21(a2) +; RV32I-NEXT: sb s0, 22(a2) +; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s5, 26(a2) +; RV32I-NEXT: sb s4, 26(a2) ; RV32I-NEXT: sb s7, 27(a2) -; RV32I-NEXT: sb ra, 28(a2) +; RV32I-NEXT: sb t2, 28(a2) ; RV32I-NEXT: sb a4, 29(a2) ; RV32I-NEXT: sb s8, 30(a2) ; RV32I-NEXT: sb s9, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB12_114: -; RV32I-NEXT: mv t0, t1 -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: bltu ra, t6, .LBB12_109 -; RV32I-NEXT: .LBB12_115: +; RV32I-NEXT: .LBB12_190: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, t4, .LBB12_174 +; RV32I-NEXT: .LBB12_191: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s4, .LBB12_175 +; RV32I-NEXT: .LBB12_192: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s9, .LBB12_176 +; RV32I-NEXT: .LBB12_193: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB12_177 +; RV32I-NEXT: .LBB12_194: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB12_178 +; RV32I-NEXT: .LBB12_195: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB12_179 +; RV32I-NEXT: .LBB12_196: +; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: beqz t1, .LBB12_180 +; RV32I-NEXT: .LBB12_197: ; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: bgeu a4, a5, .LBB12_110 -; RV32I-NEXT: .LBB12_116: -; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t0, a3, a6 -; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a6, a3, s8 -; RV32I-NEXT: lw a3, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s1, a3, s5 -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s2, a3, s6 -; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a4, .LBB12_111 -; RV32I-NEXT: .LBB12_117: -; RV32I-NEXT: mv s0, t0 -; RV32I-NEXT: mv a0, a6 -; RV32I-NEXT: mv t4, s1 -; RV32I-NEXT: mv t2, s2 -; RV32I-NEXT: bgeu a4, a5, .LBB12_112 -; RV32I-NEXT: j .LBB12_113 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s0, .LBB12_181 +; RV32I-NEXT: .LBB12_198: +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, t6, .LBB12_182 +; RV32I-NEXT: .LBB12_199: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, t4, .LBB12_183 +; RV32I-NEXT: .LBB12_200: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s4, .LBB12_184 +; RV32I-NEXT: .LBB12_201: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB12_185 +; RV32I-NEXT: .LBB12_202: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB12_186 +; RV32I-NEXT: .LBB12_203: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB12_187 +; RV32I-NEXT: .LBB12_204: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: bnez a7, .LBB12_188 +; RV32I-NEXT: j .LBB12_189 %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -3910,610 +4136,836 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; ; RV32I-LABEL: lshr_32bytes_wordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 16(a0) -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 18(a0) -; RV32I-NEXT: lbu a6, 19(a0) -; RV32I-NEXT: lbu a7, 20(a0) -; RV32I-NEXT: lbu t0, 21(a0) -; RV32I-NEXT: lbu t1, 22(a0) -; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a6, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t4, t4, t3 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t3, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a5, 1(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a7, 3(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: lbu t4, 8(a0) +; RV32I-NEXT: lbu t5, 9(a0) +; RV32I-NEXT: lbu t6, 10(a0) +; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a7, a7, a6 +; RV32I-NEXT: or t1, t1, t0 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a5, 14(a0) +; RV32I-NEXT: lbu s1, 15(a0) ; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or t3, t3, t2 +; RV32I-NEXT: or t0, t5, t4 +; RV32I-NEXT: or t5, s0, t6 +; RV32I-NEXT: lbu t2, 1(a1) +; RV32I-NEXT: lbu t4, 0(a1) +; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or s0, t2, t4 +; RV32I-NEXT: slli t2, s1, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 -; RV32I-NEXT: li t3, 32 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a4, 16 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli a4, t1, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: or t1, t5, t4 -; RV32I-NEXT: or t5, a4, a7 -; RV32I-NEXT: or a4, t2, t0 -; RV32I-NEXT: slli a4, a4, 5 -; RV32I-NEXT: srl s0, t1, a4 -; RV32I-NEXT: neg s6, a4 -; RV32I-NEXT: sll t4, t5, s6 -; RV32I-NEXT: bltu a4, t3, .LBB13_2 +; RV32I-NEXT: or a1, a1, t6 +; RV32I-NEXT: slli t4, a7, 16 +; RV32I-NEXT: slli a7, t3, 16 +; RV32I-NEXT: slli t3, t5, 16 +; RV32I-NEXT: slli t5, a1, 16 +; RV32I-NEXT: or a1, a7, t1 +; RV32I-NEXT: or a7, t5, s0 +; RV32I-NEXT: slli a7, a7, 5 +; RV32I-NEXT: srli t1, a7, 5 +; RV32I-NEXT: andi t5, a7, 31 +; RV32I-NEXT: neg s3, t5 +; RV32I-NEXT: beqz t5, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a7, t5, a4 -; RV32I-NEXT: j .LBB13_3 +; RV32I-NEXT: sll a4, a1, s3 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: or a7, s0, t4 -; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: or t0, a6, a3 -; RV32I-NEXT: or a6, a1, a5 -; RV32I-NEXT: mv a1, t1 -; RV32I-NEXT: beqz a4, .LBB13_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a1, a7 +; RV32I-NEXT: or s7, t4, a3 +; RV32I-NEXT: lbu t4, 12(a0) +; RV32I-NEXT: lbu t6, 19(a0) +; RV32I-NEXT: slli s1, a6, 8 +; RV32I-NEXT: or a5, t2, a5 +; RV32I-NEXT: or a3, t3, t0 +; RV32I-NEXT: beqz t1, .LBB13_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: j .LBB13_5 +; RV32I-NEXT: .LBB13_4: +; RV32I-NEXT: srl s0, s7, a7 +; RV32I-NEXT: or s0, s0, a4 ; RV32I-NEXT: .LBB13_5: -; RV32I-NEXT: srl a3, t0, a4 -; RV32I-NEXT: sll a5, a6, s6 -; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t3, .LBB13_7 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t0, 17(a0) +; RV32I-NEXT: lbu a4, 18(a0) +; RV32I-NEXT: slli s4, t6, 8 +; RV32I-NEXT: or s2, s1, t4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: li s5, 1 +; RV32I-NEXT: sll t6, a3, s3 +; RV32I-NEXT: beqz t5, .LBB13_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li ra, 0 -; RV32I-NEXT: srl a3, a6, a4 -; RV32I-NEXT: j .LBB13_8 +; RV32I-NEXT: mv a6, t6 ; RV32I-NEXT: .LBB13_7: -; RV32I-NEXT: srl ra, t5, a4 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: .LBB13_8: -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: mv a7, t0 -; RV32I-NEXT: beqz a4, .LBB13_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a7, a3 -; RV32I-NEXT: .LBB13_10: -; RV32I-NEXT: sw a5, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s7, t6, a4 -; RV32I-NEXT: bltu a4, t3, .LBB13_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: j .LBB13_13 -; RV32I-NEXT: .LBB13_12: -; RV32I-NEXT: srl a5, a6, a4 +; RV32I-NEXT: lbu t2, 16(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli s1, t0, 8 +; RV32I-NEXT: or t4, s4, a4 +; RV32I-NEXT: srl a4, a1, a7 +; RV32I-NEXT: or a5, a5, s2 +; RV32I-NEXT: bne t1, s5, .LBB13_9 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: or s0, a4, a6 +; RV32I-NEXT: .LBB13_9: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu s5, 21(a0) +; RV32I-NEXT: lbu a6, 22(a0) +; RV32I-NEXT: slli s4, t3, 8 +; RV32I-NEXT: or t2, s1, t2 +; RV32I-NEXT: slli s6, t4, 16 +; RV32I-NEXT: li s8, 2 +; RV32I-NEXT: sll t3, a5, s3 +; RV32I-NEXT: beqz t5, .LBB13_11 +; RV32I-NEXT: # %bb.10: +; RV32I-NEXT: mv t0, t3 +; RV32I-NEXT: .LBB13_11: +; RV32I-NEXT: lbu s1, 20(a0) +; RV32I-NEXT: lbu s2, 27(a0) +; RV32I-NEXT: slli s5, s5, 8 +; RV32I-NEXT: or s4, s4, a6 +; RV32I-NEXT: srl t4, a3, a7 +; RV32I-NEXT: or a6, s6, t2 +; RV32I-NEXT: bne t1, s8, .LBB13_13 +; RV32I-NEXT: # %bb.12: +; RV32I-NEXT: or s0, t4, t0 ; RV32I-NEXT: .LBB13_13: -; RV32I-NEXT: neg s10, s7 -; RV32I-NEXT: sw t4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s7, t3, .LBB13_15 -; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: sll a3, t1, s7 -; RV32I-NEXT: j .LBB13_16 +; RV32I-NEXT: lbu s6, 25(a0) +; RV32I-NEXT: lbu t0, 26(a0) +; RV32I-NEXT: slli s8, s2, 8 +; RV32I-NEXT: or s7, s5, s1 +; RV32I-NEXT: slli s9, s4, 16 +; RV32I-NEXT: sll s11, a6, s3 +; RV32I-NEXT: beqz t5, .LBB13_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: mv t2, s11 ; RV32I-NEXT: .LBB13_15: -; RV32I-NEXT: sll t2, t1, s6 -; RV32I-NEXT: srl a3, t1, s10 -; RV32I-NEXT: or a3, a3, t4 -; RV32I-NEXT: .LBB13_16: -; RV32I-NEXT: addi s9, a4, -64 -; RV32I-NEXT: mv t4, t5 -; RV32I-NEXT: beqz s7, .LBB13_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t4, a3 -; RV32I-NEXT: .LBB13_18: -; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t3, .LBB13_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: srl s2, t5, s9 -; RV32I-NEXT: j .LBB13_21 -; RV32I-NEXT: .LBB13_20: -; RV32I-NEXT: sll a3, t5, s11 -; RV32I-NEXT: or s2, s0, a3 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: lbu s2, 31(a0) +; RV32I-NEXT: slli s5, s6, 8 +; RV32I-NEXT: or s4, s8, t0 +; RV32I-NEXT: srl ra, a5, a7 +; RV32I-NEXT: or t0, s9, s7 +; RV32I-NEXT: li s6, 3 +; RV32I-NEXT: bne t1, s6, .LBB13_17 +; RV32I-NEXT: # %bb.16: +; RV32I-NEXT: or s0, ra, t2 +; RV32I-NEXT: .LBB13_17: +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu s7, 29(a0) +; RV32I-NEXT: lbu s6, 30(a0) +; RV32I-NEXT: slli s8, s2, 8 +; RV32I-NEXT: or s2, s5, s1 +; RV32I-NEXT: slli s5, s4, 16 +; RV32I-NEXT: li s9, 4 +; RV32I-NEXT: sll s1, t0, s3 +; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t5, .LBB13_19 +; RV32I-NEXT: # %bb.18: +; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: .LBB13_19: +; RV32I-NEXT: lbu s1, 28(a0) +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or s4, s8, s6 +; RV32I-NEXT: srl s10, a6, a7 +; RV32I-NEXT: or a0, s5, s2 +; RV32I-NEXT: bne t1, s9, .LBB13_21 +; RV32I-NEXT: # %bb.20: +; RV32I-NEXT: or s0, s10, t2 ; RV32I-NEXT: .LBB13_21: -; RV32I-NEXT: lbu s1, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: mv s0, t1 -; RV32I-NEXT: beqz s9, .LBB13_23 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: or t2, s7, s1 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: li s9, 5 +; RV32I-NEXT: sll s7, a0, s3 +; RV32I-NEXT: beqz t5, .LBB13_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: mv s2, s7 ; RV32I-NEXT: .LBB13_23: -; RV32I-NEXT: lbu s4, 9(a0) -; RV32I-NEXT: lbu s2, 10(a0) -; RV32I-NEXT: lbu s5, 13(a0) -; RV32I-NEXT: lbu s8, 14(a0) -; RV32I-NEXT: slli s3, s1, 8 -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: sw ra, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t3, .LBB13_25 +; RV32I-NEXT: srl s8, t0, a7 +; RV32I-NEXT: or t2, s4, t2 +; RV32I-NEXT: bne t1, s9, .LBB13_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: j .LBB13_26 +; RV32I-NEXT: or s0, s8, s2 ; RV32I-NEXT: .LBB13_25: -; RV32I-NEXT: srl s1, t5, a4 -; RV32I-NEXT: .LBB13_26: -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: lbu ra, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s4, s4, 8 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s8, a3, s8 -; RV32I-NEXT: bgeu a4, t6, .LBB13_28 -; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: or s0, a7, t2 -; RV32I-NEXT: or s1, a5, t4 -; RV32I-NEXT: .LBB13_28: -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: or a5, s4, ra -; RV32I-NEXT: slli t4, s2, 16 -; RV32I-NEXT: or s2, s5, s3 -; RV32I-NEXT: slli s3, s8, 16 -; RV32I-NEXT: mv s4, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a4, .LBB13_30 -; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: mv a7, s1 -; RV32I-NEXT: .LBB13_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) -; RV32I-NEXT: lbu s1, 5(a0) -; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s8, t2, 8 -; RV32I-NEXT: or t4, t4, a5 -; RV32I-NEXT: or t2, s3, s2 -; RV32I-NEXT: bltu a4, t6, .LBB13_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s2, 6 +; RV32I-NEXT: sll s5, t2, s3 +; RV32I-NEXT: beqz t5, .LBB13_27 +; RV32I-NEXT: # %bb.26: +; RV32I-NEXT: mv s4, s5 +; RV32I-NEXT: .LBB13_27: +; RV32I-NEXT: srl s6, a0, a7 +; RV32I-NEXT: bne t1, s2, .LBB13_29 +; RV32I-NEXT: # %bb.28: +; RV32I-NEXT: or s0, s6, s4 +; RV32I-NEXT: .LBB13_29: +; RV32I-NEXT: li s3, 7 +; RV32I-NEXT: srl s1, t2, a7 +; RV32I-NEXT: mv s4, s1 +; RV32I-NEXT: bne t1, s3, .LBB13_34 +; RV32I-NEXT: # %bb.30: +; RV32I-NEXT: bnez a7, .LBB13_35 +; RV32I-NEXT: .LBB13_31: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB13_36 ; RV32I-NEXT: .LBB13_32: -; RV32I-NEXT: slli s3, ra, 8 -; RV32I-NEXT: or a5, s5, a3 -; RV32I-NEXT: lbu s5, 0(a0) -; RV32I-NEXT: lbu a0, 4(a0) -; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: or a3, s8, s0 -; RV32I-NEXT: srl s2, t4, a4 -; RV32I-NEXT: sll ra, t2, s6 -; RV32I-NEXT: bltu a4, t3, .LBB13_34 -; RV32I-NEXT: # %bb.33: -; RV32I-NEXT: srl s0, t2, a4 -; RV32I-NEXT: j .LBB13_35 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: beqz t1, .LBB13_37 +; RV32I-NEXT: .LBB13_33: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB13_38 ; RV32I-NEXT: .LBB13_34: -; RV32I-NEXT: or s0, s2, ra +; RV32I-NEXT: mv s4, s0 +; RV32I-NEXT: beqz a7, .LBB13_31 ; RV32I-NEXT: .LBB13_35: -; RV32I-NEXT: or s3, s3, s5 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: or a0, s1, a0 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: mv s5, t4 -; RV32I-NEXT: beqz a4, .LBB13_37 -; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: mv s5, s0 +; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB13_32 +; RV32I-NEXT: .LBB13_36: +; RV32I-NEXT: mv s0, t6 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: bnez t1, .LBB13_33 ; RV32I-NEXT: .LBB13_37: -; RV32I-NEXT: or s0, a5, s3 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: bltu a4, t3, .LBB13_39 -; RV32I-NEXT: # %bb.38: -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: srl a3, a0, a4 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: bnez a4, .LBB13_40 -; RV32I-NEXT: j .LBB13_41 -; RV32I-NEXT: .LBB13_39: -; RV32I-NEXT: srl s8, t2, a4 -; RV32I-NEXT: srl a3, s0, a4 -; RV32I-NEXT: sll a5, a0, s6 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: beqz a4, .LBB13_41 +; RV32I-NEXT: or a4, a4, s0 +; RV32I-NEXT: .LBB13_38: +; RV32I-NEXT: li s0, 1 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB13_57 +; RV32I-NEXT: # %bb.39: +; RV32I-NEXT: beq t1, s0, .LBB13_58 ; RV32I-NEXT: .LBB13_40: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB13_59 ; RV32I-NEXT: .LBB13_41: -; RV32I-NEXT: bltu a4, t3, .LBB13_44 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: bgeu s7, t3, .LBB13_45 +; RV32I-NEXT: beq t1, s4, .LBB13_60 +; RV32I-NEXT: .LBB13_42: +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB13_61 ; RV32I-NEXT: .LBB13_43: -; RV32I-NEXT: sll s3, t4, s6 -; RV32I-NEXT: srl a3, t4, s10 -; RV32I-NEXT: or a3, a3, ra -; RV32I-NEXT: mv s10, t2 -; RV32I-NEXT: bnez s7, .LBB13_46 -; RV32I-NEXT: j .LBB13_47 +; RV32I-NEXT: li s4, 3 +; RV32I-NEXT: bne t1, s4, .LBB13_45 ; RV32I-NEXT: .LBB13_44: -; RV32I-NEXT: srl s1, a0, a4 -; RV32I-NEXT: bltu s7, t3, .LBB13_43 +; RV32I-NEXT: or a4, s10, t6 ; RV32I-NEXT: .LBB13_45: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t4, s7 -; RV32I-NEXT: mv s10, t2 -; RV32I-NEXT: beqz s7, .LBB13_47 -; RV32I-NEXT: .LBB13_46: -; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: bnez t5, .LBB13_62 +; RV32I-NEXT: # %bb.46: +; RV32I-NEXT: beq t1, s4, .LBB13_63 ; RV32I-NEXT: .LBB13_47: -; RV32I-NEXT: bltu s9, t3, .LBB13_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl a3, t2, s9 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: bnez s9, .LBB13_50 -; RV32I-NEXT: j .LBB13_51 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB13_64 +; RV32I-NEXT: .LBB13_48: +; RV32I-NEXT: beq t1, s9, .LBB13_65 ; RV32I-NEXT: .LBB13_49: -; RV32I-NEXT: sll a3, t2, s11 -; RV32I-NEXT: or a3, s2, a3 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: beqz s9, .LBB13_51 +; RV32I-NEXT: mv t6, s1 +; RV32I-NEXT: bne t1, s2, .LBB13_66 ; RV32I-NEXT: .LBB13_50: -; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB13_67 ; RV32I-NEXT: .LBB13_51: -; RV32I-NEXT: bltu s9, t3, .LBB13_53 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bltu a4, t6, .LBB13_54 -; RV32I-NEXT: j .LBB13_55 +; RV32I-NEXT: beqz a7, .LBB13_53 +; RV32I-NEXT: .LBB13_52: +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: .LBB13_53: -; RV32I-NEXT: srl s7, t2, a4 -; RV32I-NEXT: bgeu a4, t6, .LBB13_55 -; RV32I-NEXT: .LBB13_54: -; RV32I-NEXT: or s2, a5, s3 -; RV32I-NEXT: or s7, s1, s10 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t6, 2 +; RV32I-NEXT: beqz t5, .LBB13_55 +; RV32I-NEXT: # %bb.54: +; RV32I-NEXT: mv a4, t3 ; RV32I-NEXT: .LBB13_55: -; RV32I-NEXT: li a3, 128 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: beqz a4, .LBB13_57 +; RV32I-NEXT: beqz t1, .LBB13_68 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv a5, s2 -; RV32I-NEXT: mv s1, s7 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB13_69 ; RV32I-NEXT: .LBB13_57: -; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s2, a3, a4 -; RV32I-NEXT: bltu a4, t6, .LBB13_59 -; RV32I-NEXT: # %bb.58: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s8, 0 +; RV32I-NEXT: mv t6, t3 +; RV32I-NEXT: bne t1, s0, .LBB13_40 +; RV32I-NEXT: .LBB13_58: +; RV32I-NEXT: or a4, t4, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB13_41 ; RV32I-NEXT: .LBB13_59: -; RV32I-NEXT: neg s3, s2 -; RV32I-NEXT: srl a5, t0, s3 -; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t3, .LBB13_61 -; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: li s10, 0 -; RV32I-NEXT: sll a3, t0, s2 -; RV32I-NEXT: j .LBB13_62 +; RV32I-NEXT: mv t6, s11 +; RV32I-NEXT: bne t1, s4, .LBB13_42 +; RV32I-NEXT: .LBB13_60: +; RV32I-NEXT: or a4, ra, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB13_43 ; RV32I-NEXT: .LBB13_61: -; RV32I-NEXT: sll s10, t0, s6 -; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lw t6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s4, 3 +; RV32I-NEXT: beq t1, s4, .LBB13_44 +; RV32I-NEXT: j .LBB13_45 ; RV32I-NEXT: .LBB13_62: -; RV32I-NEXT: sub s1, t6, s2 -; RV32I-NEXT: mv s8, a6 -; RV32I-NEXT: beqz s2, .LBB13_64 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv s8, a3 +; RV32I-NEXT: mv t6, s7 +; RV32I-NEXT: bne t1, s4, .LBB13_47 +; RV32I-NEXT: .LBB13_63: +; RV32I-NEXT: or a4, s8, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB13_48 ; RV32I-NEXT: .LBB13_64: -; RV32I-NEXT: bltu s1, t3, .LBB13_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: srl a3, a6, s1 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: bnez s1, .LBB13_67 -; RV32I-NEXT: j .LBB13_68 +; RV32I-NEXT: mv t6, s5 +; RV32I-NEXT: bne t1, s9, .LBB13_49 +; RV32I-NEXT: .LBB13_65: +; RV32I-NEXT: or a4, s6, t6 +; RV32I-NEXT: mv t6, s1 +; RV32I-NEXT: beq t1, s2, .LBB13_50 ; RV32I-NEXT: .LBB13_66: -; RV32I-NEXT: neg a3, s1 -; RV32I-NEXT: sll a3, a6, a3 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: beqz s1, .LBB13_68 +; RV32I-NEXT: mv t6, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB13_51 ; RV32I-NEXT: .LBB13_67: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a4, t6 +; RV32I-NEXT: bnez a7, .LBB13_52 +; RV32I-NEXT: j .LBB13_53 ; RV32I-NEXT: .LBB13_68: -; RV32I-NEXT: bltu s1, t3, .LBB13_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, t3, .LBB13_72 -; RV32I-NEXT: .LBB13_70: -; RV32I-NEXT: sll s6, t1, s6 -; RV32I-NEXT: srl a3, t1, s3 -; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB13_73 +; RV32I-NEXT: or a4, t4, a4 +; RV32I-NEXT: .LBB13_69: +; RV32I-NEXT: li t4, 3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_84 +; RV32I-NEXT: # %bb.70: +; RV32I-NEXT: beq t1, s0, .LBB13_85 ; RV32I-NEXT: .LBB13_71: -; RV32I-NEXT: srl s1, a6, s3 -; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t3, .LBB13_70 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_86 ; RV32I-NEXT: .LBB13_72: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: sll a3, t1, s2 +; RV32I-NEXT: beq t1, t6, .LBB13_87 ; RV32I-NEXT: .LBB13_73: -; RV32I-NEXT: addi s9, s2, -64 -; RV32I-NEXT: mv s5, t5 -; RV32I-NEXT: beqz s2, .LBB13_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_88 +; RV32I-NEXT: .LBB13_74: +; RV32I-NEXT: beq t1, t4, .LBB13_89 ; RV32I-NEXT: .LBB13_75: -; RV32I-NEXT: bltu s9, t3, .LBB13_77 -; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t0, s9 -; RV32I-NEXT: mv s7, a6 -; RV32I-NEXT: bnez s9, .LBB13_78 -; RV32I-NEXT: j .LBB13_79 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_90 +; RV32I-NEXT: .LBB13_76: +; RV32I-NEXT: beq t1, s4, .LBB13_91 ; RV32I-NEXT: .LBB13_77: -; RV32I-NEXT: sll s3, t0, s2 -; RV32I-NEXT: neg a3, s9 -; RV32I-NEXT: srl a3, t0, a3 -; RV32I-NEXT: sll s7, a6, s2 -; RV32I-NEXT: or a3, a3, s7 -; RV32I-NEXT: mv s7, a6 -; RV32I-NEXT: beqz s9, .LBB13_79 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s9, .LBB13_92 ; RV32I-NEXT: .LBB13_78: -; RV32I-NEXT: mv s7, a3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB13_93 ; RV32I-NEXT: .LBB13_79: -; RV32I-NEXT: bltu s2, t6, .LBB13_81 -; RV32I-NEXT: # %bb.80: -; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: j .LBB13_82 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB13_94 +; RV32I-NEXT: .LBB13_80: +; RV32I-NEXT: bnez a7, .LBB13_95 ; RV32I-NEXT: .LBB13_81: -; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s3, a5, s6 -; RV32I-NEXT: or s7, s1, s5 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB13_96 ; RV32I-NEXT: .LBB13_82: -; RV32I-NEXT: addi ra, a4, -128 -; RV32I-NEXT: mv s5, t1 -; RV32I-NEXT: mv s6, t5 -; RV32I-NEXT: beqz s2, .LBB13_84 -; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: mv s5, s3 -; RV32I-NEXT: mv s6, s7 +; RV32I-NEXT: beqz t1, .LBB13_97 +; RV32I-NEXT: .LBB13_83: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB13_98 ; RV32I-NEXT: .LBB13_84: -; RV32I-NEXT: neg s9, ra -; RV32I-NEXT: sll s3, t5, s9 -; RV32I-NEXT: bltu ra, t3, .LBB13_86 -; RV32I-NEXT: # %bb.85: -; RV32I-NEXT: srl a3, t5, ra -; RV32I-NEXT: mv s1, t1 -; RV32I-NEXT: bnez ra, .LBB13_87 -; RV32I-NEXT: j .LBB13_88 +; RV32I-NEXT: mv t3, s11 +; RV32I-NEXT: bne t1, s0, .LBB13_71 +; RV32I-NEXT: .LBB13_85: +; RV32I-NEXT: or a4, ra, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_72 ; RV32I-NEXT: .LBB13_86: -; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: mv s1, t1 -; RV32I-NEXT: beqz ra, .LBB13_88 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, t6, .LBB13_73 ; RV32I-NEXT: .LBB13_87: -; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: or a4, s10, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_74 ; RV32I-NEXT: .LBB13_88: -; RV32I-NEXT: bltu ra, t3, .LBB13_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: srl a3, a6, ra -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: bnez ra, .LBB13_91 -; RV32I-NEXT: j .LBB13_92 +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bne t1, t4, .LBB13_75 +; RV32I-NEXT: .LBB13_89: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_76 ; RV32I-NEXT: .LBB13_90: -; RV32I-NEXT: srl s2, t5, a4 -; RV32I-NEXT: sll a3, a6, s9 -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: beqz ra, .LBB13_92 +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, s4, .LBB13_77 ; RV32I-NEXT: .LBB13_91: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s9, .LBB13_78 ; RV32I-NEXT: .LBB13_92: -; RV32I-NEXT: sub s10, t6, ra -; RV32I-NEXT: bltu ra, t3, .LBB13_95 -; RV32I-NEXT: # %bb.93: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bgeu s10, t3, .LBB13_96 +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB13_79 +; RV32I-NEXT: .LBB13_93: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB13_80 ; RV32I-NEXT: .LBB13_94: -; RV32I-NEXT: sll s9, t1, s9 -; RV32I-NEXT: neg a3, s10 -; RV32I-NEXT: srl a3, t1, a3 -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB13_97 +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB13_81 ; RV32I-NEXT: .LBB13_95: -; RV32I-NEXT: srl s7, a6, a4 -; RV32I-NEXT: bltu s10, t3, .LBB13_94 +; RV32I-NEXT: mv a3, t3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB13_82 ; RV32I-NEXT: .LBB13_96: -; RV32I-NEXT: li s9, 0 -; RV32I-NEXT: sll a3, t1, s10 +; RV32I-NEXT: mv a4, s11 +; RV32I-NEXT: bnez t1, .LBB13_83 ; RV32I-NEXT: .LBB13_97: -; RV32I-NEXT: addi s11, ra, -64 -; RV32I-NEXT: mv s3, t5 -; RV32I-NEXT: beqz s10, .LBB13_99 -; RV32I-NEXT: # %bb.98: -; RV32I-NEXT: mv s3, a3 -; RV32I-NEXT: .LBB13_99: -; RV32I-NEXT: bltu s11, t3, .LBB13_101 -; RV32I-NEXT: # %bb.100: -; RV32I-NEXT: srl a3, t5, s11 -; RV32I-NEXT: bnez s11, .LBB13_102 -; RV32I-NEXT: j .LBB13_103 +; RV32I-NEXT: or a4, ra, a4 +; RV32I-NEXT: .LBB13_98: +; RV32I-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_112 +; RV32I-NEXT: # %bb.99: +; RV32I-NEXT: beq t1, s0, .LBB13_113 +; RV32I-NEXT: .LBB13_100: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_114 ; RV32I-NEXT: .LBB13_101: -; RV32I-NEXT: srl a3, t1, ra -; RV32I-NEXT: neg s10, s11 -; RV32I-NEXT: sll s10, t5, s10 -; RV32I-NEXT: or a3, a3, s10 -; RV32I-NEXT: beqz s11, .LBB13_103 +; RV32I-NEXT: beq t1, t6, .LBB13_115 ; RV32I-NEXT: .LBB13_102: -; RV32I-NEXT: mv t1, a3 -; RV32I-NEXT: .LBB13_103: -; RV32I-NEXT: bltu s11, t3, .LBB13_105 -; RV32I-NEXT: # %bb.104: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bltu ra, t6, .LBB13_106 -; RV32I-NEXT: j .LBB13_107 +; RV32I-NEXT: bnez t5, .LBB13_116 +; RV32I-NEXT: .LBB13_103: +; RV32I-NEXT: beq t1, t4, .LBB13_117 +; RV32I-NEXT: .LBB13_104: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s4, .LBB13_118 ; RV32I-NEXT: .LBB13_105: -; RV32I-NEXT: srl t3, t5, ra -; RV32I-NEXT: bgeu ra, t6, .LBB13_107 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB13_119 ; RV32I-NEXT: .LBB13_106: -; RV32I-NEXT: or t1, a5, s9 -; RV32I-NEXT: or t3, s7, s3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB13_120 ; RV32I-NEXT: .LBB13_107: -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: bnez ra, .LBB13_114 -; RV32I-NEXT: # %bb.108: -; RV32I-NEXT: bgeu ra, t6, .LBB13_115 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB13_121 +; RV32I-NEXT: .LBB13_108: +; RV32I-NEXT: bnez a7, .LBB13_122 ; RV32I-NEXT: .LBB13_109: -; RV32I-NEXT: bltu a4, a5, .LBB13_116 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB13_123 ; RV32I-NEXT: .LBB13_110: -; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a4, .LBB13_117 +; RV32I-NEXT: beqz t1, .LBB13_124 ; RV32I-NEXT: .LBB13_111: -; RV32I-NEXT: bltu a4, a5, .LBB13_113 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_125 +; RV32I-NEXT: j .LBB13_126 ; RV32I-NEXT: .LBB13_112: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s0, .LBB13_100 ; RV32I-NEXT: .LBB13_113: -; RV32I-NEXT: srli a4, s0, 16 -; RV32I-NEXT: lui t1, 16 -; RV32I-NEXT: srli t0, s0, 24 -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: srli a6, t4, 16 -; RV32I-NEXT: srli s2, t4, 24 -; RV32I-NEXT: srli t3, t2, 16 -; RV32I-NEXT: srli s3, t2, 24 -; RV32I-NEXT: srli s1, s4, 16 -; RV32I-NEXT: srli a3, s4, 24 -; RV32I-NEXT: srli t6, a7, 16 -; RV32I-NEXT: srli s6, a7, 24 -; RV32I-NEXT: srli s5, a1, 16 -; RV32I-NEXT: srli s7, a1, 24 -; RV32I-NEXT: srli s8, ra, 16 -; RV32I-NEXT: srli s9, ra, 24 -; RV32I-NEXT: addi t1, t1, -1 -; RV32I-NEXT: and s10, s0, t1 -; RV32I-NEXT: and s11, a0, t1 +; RV32I-NEXT: or a4, s10, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_101 +; RV32I-NEXT: .LBB13_114: +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bne t1, t6, .LBB13_102 +; RV32I-NEXT: .LBB13_115: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_103 +; RV32I-NEXT: .LBB13_116: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, t4, .LBB13_104 +; RV32I-NEXT: .LBB13_117: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s4, .LBB13_105 +; RV32I-NEXT: .LBB13_118: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB13_106 +; RV32I-NEXT: .LBB13_119: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB13_107 +; RV32I-NEXT: .LBB13_120: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB13_108 +; RV32I-NEXT: .LBB13_121: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a7, .LBB13_109 +; RV32I-NEXT: .LBB13_122: +; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB13_110 +; RV32I-NEXT: .LBB13_123: +; RV32I-NEXT: lw a4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t1, .LBB13_111 +; RV32I-NEXT: .LBB13_124: +; RV32I-NEXT: or a4, s10, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_126 +; RV32I-NEXT: .LBB13_125: +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: .LBB13_126: +; RV32I-NEXT: beq t1, s0, .LBB13_138 +; RV32I-NEXT: # %bb.127: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_139 +; RV32I-NEXT: .LBB13_128: +; RV32I-NEXT: beq t1, t6, .LBB13_140 +; RV32I-NEXT: .LBB13_129: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, t4, .LBB13_141 +; RV32I-NEXT: .LBB13_130: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s4, .LBB13_142 +; RV32I-NEXT: .LBB13_131: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s9, .LBB13_143 +; RV32I-NEXT: .LBB13_132: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB13_144 +; RV32I-NEXT: .LBB13_133: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB13_145 +; RV32I-NEXT: .LBB13_134: +; RV32I-NEXT: bnez a7, .LBB13_146 +; RV32I-NEXT: .LBB13_135: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB13_147 +; RV32I-NEXT: .LBB13_136: +; RV32I-NEXT: beqz t1, .LBB13_148 +; RV32I-NEXT: .LBB13_137: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB13_149 +; RV32I-NEXT: j .LBB13_150 +; RV32I-NEXT: .LBB13_138: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_128 +; RV32I-NEXT: .LBB13_139: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, t6, .LBB13_129 +; RV32I-NEXT: .LBB13_140: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, t4, .LBB13_130 +; RV32I-NEXT: .LBB13_141: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s4, .LBB13_131 +; RV32I-NEXT: .LBB13_142: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s9, .LBB13_132 +; RV32I-NEXT: .LBB13_143: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB13_133 +; RV32I-NEXT: .LBB13_144: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB13_134 +; RV32I-NEXT: .LBB13_145: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB13_135 +; RV32I-NEXT: .LBB13_146: +; RV32I-NEXT: mv a6, t3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB13_136 +; RV32I-NEXT: .LBB13_147: +; RV32I-NEXT: mv a4, s7 +; RV32I-NEXT: bnez t1, .LBB13_137 +; RV32I-NEXT: .LBB13_148: +; RV32I-NEXT: or a4, s8, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB13_150 +; RV32I-NEXT: .LBB13_149: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: .LBB13_150: +; RV32I-NEXT: beq t1, s0, .LBB13_161 +; RV32I-NEXT: # %bb.151: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, t6, .LBB13_162 +; RV32I-NEXT: .LBB13_152: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t4, .LBB13_163 +; RV32I-NEXT: .LBB13_153: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s4, .LBB13_164 +; RV32I-NEXT: .LBB13_154: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB13_165 +; RV32I-NEXT: .LBB13_155: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB13_166 +; RV32I-NEXT: .LBB13_156: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB13_167 +; RV32I-NEXT: .LBB13_157: +; RV32I-NEXT: bnez a7, .LBB13_168 +; RV32I-NEXT: .LBB13_158: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB13_169 +; RV32I-NEXT: .LBB13_159: +; RV32I-NEXT: beqz t1, .LBB13_170 +; RV32I-NEXT: .LBB13_160: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s0, .LBB13_171 +; RV32I-NEXT: j .LBB13_172 +; RV32I-NEXT: .LBB13_161: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, t6, .LBB13_152 +; RV32I-NEXT: .LBB13_162: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, t4, .LBB13_153 +; RV32I-NEXT: .LBB13_163: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s4, .LBB13_154 +; RV32I-NEXT: .LBB13_164: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB13_155 +; RV32I-NEXT: .LBB13_165: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB13_156 +; RV32I-NEXT: .LBB13_166: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB13_157 +; RV32I-NEXT: .LBB13_167: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a7, .LBB13_158 +; RV32I-NEXT: .LBB13_168: +; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB13_159 +; RV32I-NEXT: .LBB13_169: +; RV32I-NEXT: mv a4, s5 +; RV32I-NEXT: bnez t1, .LBB13_160 +; RV32I-NEXT: .LBB13_170: +; RV32I-NEXT: or a4, s6, a4 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s0, .LBB13_172 +; RV32I-NEXT: .LBB13_171: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: .LBB13_172: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t6, .LBB13_190 +; RV32I-NEXT: # %bb.173: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, t4, .LBB13_191 +; RV32I-NEXT: .LBB13_174: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s4, .LBB13_192 +; RV32I-NEXT: .LBB13_175: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s9, .LBB13_193 +; RV32I-NEXT: .LBB13_176: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB13_194 +; RV32I-NEXT: .LBB13_177: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB13_195 +; RV32I-NEXT: .LBB13_178: +; RV32I-NEXT: bnez a7, .LBB13_196 +; RV32I-NEXT: .LBB13_179: +; RV32I-NEXT: bnez t1, .LBB13_197 +; RV32I-NEXT: .LBB13_180: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s0, .LBB13_198 +; RV32I-NEXT: .LBB13_181: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, t6, .LBB13_199 +; RV32I-NEXT: .LBB13_182: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t4, .LBB13_200 +; RV32I-NEXT: .LBB13_183: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s4, .LBB13_201 +; RV32I-NEXT: .LBB13_184: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB13_202 +; RV32I-NEXT: .LBB13_185: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB13_203 +; RV32I-NEXT: .LBB13_186: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB13_204 +; RV32I-NEXT: .LBB13_187: +; RV32I-NEXT: beqz a7, .LBB13_189 +; RV32I-NEXT: .LBB13_188: +; RV32I-NEXT: mv t2, a4 +; RV32I-NEXT: .LBB13_189: +; RV32I-NEXT: srli a4, ra, 16 +; RV32I-NEXT: lui t4, 16 +; RV32I-NEXT: srli t3, ra, 24 +; RV32I-NEXT: srli a7, a1, 16 +; RV32I-NEXT: srli t6, a1, 24 +; RV32I-NEXT: srli t1, a3, 16 +; RV32I-NEXT: srli s2, a3, 24 +; RV32I-NEXT: srli t5, a5, 16 +; RV32I-NEXT: srli s3, a5, 24 +; RV32I-NEXT: srli s1, a6, 16 +; RV32I-NEXT: srli s6, a6, 24 +; RV32I-NEXT: srli s0, t0, 16 +; RV32I-NEXT: srli s5, t0, 24 +; RV32I-NEXT: srli s4, a0, 16 +; RV32I-NEXT: srli s7, a0, 24 +; RV32I-NEXT: srli s8, t2, 16 +; RV32I-NEXT: srli s9, t2, 24 +; RV32I-NEXT: addi t4, t4, -1 +; RV32I-NEXT: and s10, ra, t4 +; RV32I-NEXT: and s11, a1, t4 ; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb s0, 0(a2) +; RV32I-NEXT: sb ra, 0(a2) ; RV32I-NEXT: sb s10, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t0, 3(a2) -; RV32I-NEXT: and a4, t4, t1 -; RV32I-NEXT: srli t0, s11, 8 -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: sb t0, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t5, 7(a2) -; RV32I-NEXT: and a0, t2, t1 +; RV32I-NEXT: sb t3, 3(a2) +; RV32I-NEXT: and a4, a3, t4 +; RV32I-NEXT: srli t3, s11, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb t3, 5(a2) +; RV32I-NEXT: sb a7, 6(a2) +; RV32I-NEXT: sb t6, 7(a2) +; RV32I-NEXT: and a1, a5, t4 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t4, 8(a2) +; RV32I-NEXT: sb a3, 8(a2) ; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb a6, 10(a2) +; RV32I-NEXT: sb t1, 10(a2) ; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, s4, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb t2, 12(a2) -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t3, 14(a2) +; RV32I-NEXT: and a3, a6, t4 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a5, 12(a2) +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: sb t5, 14(a2) ; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a0, a7, t1 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s4, 16(a2) -; RV32I-NEXT: sb a4, 17(a2) +; RV32I-NEXT: and a1, t0, t4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a6, 16(a2) +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a3, a1, t1 -; RV32I-NEXT: and a4, ra, t1 -; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb s6, 19(a2) +; RV32I-NEXT: and a3, a0, t4 +; RV32I-NEXT: and a4, t2, t4 +; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 20(a2) -; RV32I-NEXT: sb a0, 21(a2) -; RV32I-NEXT: sb t6, 22(a2) -; RV32I-NEXT: sb s6, 23(a2) -; RV32I-NEXT: sb a1, 24(a2) +; RV32I-NEXT: sb t0, 20(a2) +; RV32I-NEXT: sb a1, 21(a2) +; RV32I-NEXT: sb s0, 22(a2) +; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s5, 26(a2) +; RV32I-NEXT: sb s4, 26(a2) ; RV32I-NEXT: sb s7, 27(a2) -; RV32I-NEXT: sb ra, 28(a2) +; RV32I-NEXT: sb t2, 28(a2) ; RV32I-NEXT: sb a4, 29(a2) ; RV32I-NEXT: sb s8, 30(a2) ; RV32I-NEXT: sb s9, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB13_114: -; RV32I-NEXT: mv t0, t1 -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: bltu ra, t6, .LBB13_109 -; RV32I-NEXT: .LBB13_115: +; RV32I-NEXT: .LBB13_190: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, t4, .LBB13_174 +; RV32I-NEXT: .LBB13_191: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s4, .LBB13_175 +; RV32I-NEXT: .LBB13_192: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s9, .LBB13_176 +; RV32I-NEXT: .LBB13_193: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB13_177 +; RV32I-NEXT: .LBB13_194: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB13_178 +; RV32I-NEXT: .LBB13_195: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB13_179 +; RV32I-NEXT: .LBB13_196: +; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: beqz t1, .LBB13_180 +; RV32I-NEXT: .LBB13_197: ; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: bgeu a4, a5, .LBB13_110 -; RV32I-NEXT: .LBB13_116: -; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t0, a3, a6 -; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a6, a3, s8 -; RV32I-NEXT: lw a3, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s1, a3, s5 -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s2, a3, s6 -; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a4, .LBB13_111 -; RV32I-NEXT: .LBB13_117: -; RV32I-NEXT: mv s0, t0 -; RV32I-NEXT: mv a0, a6 -; RV32I-NEXT: mv t4, s1 -; RV32I-NEXT: mv t2, s2 -; RV32I-NEXT: bgeu a4, a5, .LBB13_112 -; RV32I-NEXT: j .LBB13_113 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s0, .LBB13_181 +; RV32I-NEXT: .LBB13_198: +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, t6, .LBB13_182 +; RV32I-NEXT: .LBB13_199: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, t4, .LBB13_183 +; RV32I-NEXT: .LBB13_200: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s4, .LBB13_184 +; RV32I-NEXT: .LBB13_201: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB13_185 +; RV32I-NEXT: .LBB13_202: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB13_186 +; RV32I-NEXT: .LBB13_203: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB13_187 +; RV32I-NEXT: .LBB13_204: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: bnez a7, .LBB13_188 +; RV32I-NEXT: j .LBB13_189 %src = load i256, ptr %src.ptr, align 1 %wordOff = load i256, ptr %wordOff.ptr, align 1 %bitOff = shl i256 %wordOff, 5 @@ -4834,610 +5286,836 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; ; RV32I-LABEL: lshr_32bytes_dwordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 16(a0) -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 18(a0) -; RV32I-NEXT: lbu a6, 19(a0) -; RV32I-NEXT: lbu a7, 20(a0) -; RV32I-NEXT: lbu t0, 21(a0) -; RV32I-NEXT: lbu t1, 22(a0) -; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a6, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t4, t4, t3 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a5, 1(a0) +; RV32I-NEXT: lbu a6, 2(a0) +; RV32I-NEXT: lbu a7, 3(a0) +; RV32I-NEXT: lbu t0, 4(a0) +; RV32I-NEXT: lbu t1, 5(a0) +; RV32I-NEXT: lbu t2, 6(a0) +; RV32I-NEXT: lbu t3, 7(a0) +; RV32I-NEXT: lbu t4, 8(a0) +; RV32I-NEXT: lbu t5, 9(a0) +; RV32I-NEXT: lbu t6, 10(a0) +; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: or a7, a7, a6 +; RV32I-NEXT: or t1, t1, t0 +; RV32I-NEXT: lbu a6, 13(a0) +; RV32I-NEXT: lbu a5, 14(a0) +; RV32I-NEXT: lbu s1, 15(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or t3, t3, t2 +; RV32I-NEXT: or t0, t5, t4 +; RV32I-NEXT: or t5, s0, t6 +; RV32I-NEXT: lbu t2, 1(a1) +; RV32I-NEXT: lbu t4, 0(a1) ; RV32I-NEXT: lbu t6, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: or s0, t2, t4 +; RV32I-NEXT: slli t2, s1, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 -; RV32I-NEXT: li t3, 32 -; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a4, 16 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli a4, t1, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: or t1, t5, t4 -; RV32I-NEXT: or t5, a4, a7 -; RV32I-NEXT: or a4, t2, t0 -; RV32I-NEXT: slli a4, a4, 6 -; RV32I-NEXT: srl s0, t1, a4 -; RV32I-NEXT: neg s6, a4 -; RV32I-NEXT: sll t4, t5, s6 -; RV32I-NEXT: bltu a4, t3, .LBB14_2 +; RV32I-NEXT: or a1, a1, t6 +; RV32I-NEXT: slli t4, a7, 16 +; RV32I-NEXT: slli a7, t3, 16 +; RV32I-NEXT: slli t3, t5, 16 +; RV32I-NEXT: slli t5, a1, 16 +; RV32I-NEXT: or a1, a7, t1 +; RV32I-NEXT: or a7, t5, s0 +; RV32I-NEXT: slli a7, a7, 6 +; RV32I-NEXT: srli t1, a7, 5 +; RV32I-NEXT: andi t5, a7, 31 +; RV32I-NEXT: neg s3, t5 +; RV32I-NEXT: beqz t5, .LBB14_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a7, t5, a4 -; RV32I-NEXT: j .LBB14_3 +; RV32I-NEXT: sll a4, a1, s3 ; RV32I-NEXT: .LBB14_2: -; RV32I-NEXT: or a7, s0, t4 -; RV32I-NEXT: .LBB14_3: -; RV32I-NEXT: or t0, a6, a3 -; RV32I-NEXT: or a6, a1, a5 -; RV32I-NEXT: mv a1, t1 -; RV32I-NEXT: beqz a4, .LBB14_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a1, a7 +; RV32I-NEXT: or s7, t4, a3 +; RV32I-NEXT: lbu t4, 12(a0) +; RV32I-NEXT: lbu t6, 19(a0) +; RV32I-NEXT: slli s1, a6, 8 +; RV32I-NEXT: or a5, t2, a5 +; RV32I-NEXT: or a3, t3, t0 +; RV32I-NEXT: beqz t1, .LBB14_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: j .LBB14_5 +; RV32I-NEXT: .LBB14_4: +; RV32I-NEXT: srl s0, s7, a7 +; RV32I-NEXT: or s0, s0, a4 ; RV32I-NEXT: .LBB14_5: -; RV32I-NEXT: srl a3, t0, a4 -; RV32I-NEXT: sll a5, a6, s6 -; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t3, .LBB14_7 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t0, 17(a0) +; RV32I-NEXT: lbu a4, 18(a0) +; RV32I-NEXT: slli s4, t6, 8 +; RV32I-NEXT: or s2, s1, t4 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: li s5, 1 +; RV32I-NEXT: sll t6, a3, s3 +; RV32I-NEXT: beqz t5, .LBB14_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li ra, 0 -; RV32I-NEXT: srl a3, a6, a4 -; RV32I-NEXT: j .LBB14_8 +; RV32I-NEXT: mv a6, t6 ; RV32I-NEXT: .LBB14_7: -; RV32I-NEXT: srl ra, t5, a4 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: .LBB14_8: -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: mv a7, t0 -; RV32I-NEXT: beqz a4, .LBB14_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a7, a3 -; RV32I-NEXT: .LBB14_10: -; RV32I-NEXT: sw a5, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s7, t6, a4 -; RV32I-NEXT: bltu a4, t3, .LBB14_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: j .LBB14_13 -; RV32I-NEXT: .LBB14_12: -; RV32I-NEXT: srl a5, a6, a4 +; RV32I-NEXT: lbu t2, 16(a0) +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: slli s1, t0, 8 +; RV32I-NEXT: or t4, s4, a4 +; RV32I-NEXT: srl a4, a1, a7 +; RV32I-NEXT: or a5, a5, s2 +; RV32I-NEXT: bne t1, s5, .LBB14_9 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: or s0, a4, a6 +; RV32I-NEXT: .LBB14_9: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu s5, 21(a0) +; RV32I-NEXT: lbu a6, 22(a0) +; RV32I-NEXT: slli s4, t3, 8 +; RV32I-NEXT: or t2, s1, t2 +; RV32I-NEXT: slli s6, t4, 16 +; RV32I-NEXT: li s8, 2 +; RV32I-NEXT: sll t3, a5, s3 +; RV32I-NEXT: beqz t5, .LBB14_11 +; RV32I-NEXT: # %bb.10: +; RV32I-NEXT: mv t0, t3 +; RV32I-NEXT: .LBB14_11: +; RV32I-NEXT: lbu s1, 20(a0) +; RV32I-NEXT: lbu s2, 27(a0) +; RV32I-NEXT: slli s5, s5, 8 +; RV32I-NEXT: or s4, s4, a6 +; RV32I-NEXT: srl t4, a3, a7 +; RV32I-NEXT: or a6, s6, t2 +; RV32I-NEXT: bne t1, s8, .LBB14_13 +; RV32I-NEXT: # %bb.12: +; RV32I-NEXT: or s0, t4, t0 ; RV32I-NEXT: .LBB14_13: -; RV32I-NEXT: neg s10, s7 -; RV32I-NEXT: sw t4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s7, t3, .LBB14_15 -; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: sw s7, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: li t2, 0 -; RV32I-NEXT: sll a3, t1, s7 -; RV32I-NEXT: j .LBB14_16 +; RV32I-NEXT: lbu s6, 25(a0) +; RV32I-NEXT: lbu t0, 26(a0) +; RV32I-NEXT: slli s8, s2, 8 +; RV32I-NEXT: or s7, s5, s1 +; RV32I-NEXT: slli s9, s4, 16 +; RV32I-NEXT: sll s11, a6, s3 +; RV32I-NEXT: beqz t5, .LBB14_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: mv t2, s11 ; RV32I-NEXT: .LBB14_15: -; RV32I-NEXT: sll t2, t1, s6 -; RV32I-NEXT: srl a3, t1, s10 -; RV32I-NEXT: or a3, a3, t4 -; RV32I-NEXT: .LBB14_16: -; RV32I-NEXT: addi s9, a4, -64 -; RV32I-NEXT: mv t4, t5 -; RV32I-NEXT: beqz s7, .LBB14_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t4, a3 -; RV32I-NEXT: .LBB14_18: -; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t3, .LBB14_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: srl s2, t5, s9 -; RV32I-NEXT: j .LBB14_21 -; RV32I-NEXT: .LBB14_20: -; RV32I-NEXT: sll a3, t5, s11 -; RV32I-NEXT: or s2, s0, a3 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: lbu s2, 31(a0) +; RV32I-NEXT: slli s5, s6, 8 +; RV32I-NEXT: or s4, s8, t0 +; RV32I-NEXT: srl ra, a5, a7 +; RV32I-NEXT: or t0, s9, s7 +; RV32I-NEXT: li s6, 3 +; RV32I-NEXT: bne t1, s6, .LBB14_17 +; RV32I-NEXT: # %bb.16: +; RV32I-NEXT: or s0, ra, t2 +; RV32I-NEXT: .LBB14_17: +; RV32I-NEXT: li t2, 0 +; RV32I-NEXT: lbu s7, 29(a0) +; RV32I-NEXT: lbu s6, 30(a0) +; RV32I-NEXT: slli s8, s2, 8 +; RV32I-NEXT: or s2, s5, s1 +; RV32I-NEXT: slli s5, s4, 16 +; RV32I-NEXT: li s9, 4 +; RV32I-NEXT: sll s1, t0, s3 +; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t5, .LBB14_19 +; RV32I-NEXT: # %bb.18: +; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: .LBB14_19: +; RV32I-NEXT: lbu s1, 28(a0) +; RV32I-NEXT: slli s7, s7, 8 +; RV32I-NEXT: or s4, s8, s6 +; RV32I-NEXT: srl s10, a6, a7 +; RV32I-NEXT: or a0, s5, s2 +; RV32I-NEXT: bne t1, s9, .LBB14_21 +; RV32I-NEXT: # %bb.20: +; RV32I-NEXT: or s0, s10, t2 ; RV32I-NEXT: .LBB14_21: -; RV32I-NEXT: lbu s1, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: mv s0, t1 -; RV32I-NEXT: beqz s9, .LBB14_23 +; RV32I-NEXT: li s2, 0 +; RV32I-NEXT: or t2, s7, s1 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: li s9, 5 +; RV32I-NEXT: sll s7, a0, s3 +; RV32I-NEXT: beqz t5, .LBB14_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: mv s2, s7 ; RV32I-NEXT: .LBB14_23: -; RV32I-NEXT: lbu s4, 9(a0) -; RV32I-NEXT: lbu s2, 10(a0) -; RV32I-NEXT: lbu s5, 13(a0) -; RV32I-NEXT: lbu s8, 14(a0) -; RV32I-NEXT: slli s3, s1, 8 -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: sw ra, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t3, .LBB14_25 +; RV32I-NEXT: srl s8, t0, a7 +; RV32I-NEXT: or t2, s4, t2 +; RV32I-NEXT: bne t1, s9, .LBB14_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: j .LBB14_26 +; RV32I-NEXT: or s0, s8, s2 ; RV32I-NEXT: .LBB14_25: -; RV32I-NEXT: srl s1, t5, a4 -; RV32I-NEXT: .LBB14_26: -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: lbu ra, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s4, s4, 8 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s8, a3, s8 -; RV32I-NEXT: bgeu a4, t6, .LBB14_28 -; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: or s0, a7, t2 -; RV32I-NEXT: or s1, a5, t4 -; RV32I-NEXT: .LBB14_28: -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: or a5, s4, ra -; RV32I-NEXT: slli t4, s2, 16 -; RV32I-NEXT: or s2, s5, s3 -; RV32I-NEXT: slli s3, s8, 16 -; RV32I-NEXT: mv s4, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a4, .LBB14_30 -; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: mv a7, s1 -; RV32I-NEXT: .LBB14_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) -; RV32I-NEXT: lbu s1, 5(a0) -; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s8, t2, 8 -; RV32I-NEXT: or t4, t4, a5 -; RV32I-NEXT: or t2, s3, s2 -; RV32I-NEXT: bltu a4, t6, .LBB14_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s2, 6 +; RV32I-NEXT: sll s5, t2, s3 +; RV32I-NEXT: beqz t5, .LBB14_27 +; RV32I-NEXT: # %bb.26: +; RV32I-NEXT: mv s4, s5 +; RV32I-NEXT: .LBB14_27: +; RV32I-NEXT: srl s6, a0, a7 +; RV32I-NEXT: bne t1, s2, .LBB14_29 +; RV32I-NEXT: # %bb.28: +; RV32I-NEXT: or s0, s6, s4 +; RV32I-NEXT: .LBB14_29: +; RV32I-NEXT: li s3, 7 +; RV32I-NEXT: srl s1, t2, a7 +; RV32I-NEXT: mv s4, s1 +; RV32I-NEXT: bne t1, s3, .LBB14_34 +; RV32I-NEXT: # %bb.30: +; RV32I-NEXT: bnez a7, .LBB14_35 +; RV32I-NEXT: .LBB14_31: +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: bnez t5, .LBB14_36 ; RV32I-NEXT: .LBB14_32: -; RV32I-NEXT: slli s3, ra, 8 -; RV32I-NEXT: or a5, s5, a3 -; RV32I-NEXT: lbu s5, 0(a0) -; RV32I-NEXT: lbu a0, 4(a0) -; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: or a3, s8, s0 -; RV32I-NEXT: srl s2, t4, a4 -; RV32I-NEXT: sll ra, t2, s6 -; RV32I-NEXT: bltu a4, t3, .LBB14_34 -; RV32I-NEXT: # %bb.33: -; RV32I-NEXT: srl s0, t2, a4 -; RV32I-NEXT: j .LBB14_35 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: beqz t1, .LBB14_37 +; RV32I-NEXT: .LBB14_33: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB14_38 ; RV32I-NEXT: .LBB14_34: -; RV32I-NEXT: or s0, s2, ra +; RV32I-NEXT: mv s4, s0 +; RV32I-NEXT: beqz a7, .LBB14_31 ; RV32I-NEXT: .LBB14_35: -; RV32I-NEXT: or s3, s3, s5 -; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: or a0, s1, a0 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: mv s5, t4 -; RV32I-NEXT: beqz a4, .LBB14_37 -; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: mv s5, s0 +; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: beqz t5, .LBB14_32 +; RV32I-NEXT: .LBB14_36: +; RV32I-NEXT: mv s0, t6 +; RV32I-NEXT: li s4, 2 +; RV32I-NEXT: bnez t1, .LBB14_33 ; RV32I-NEXT: .LBB14_37: -; RV32I-NEXT: or s0, a5, s3 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: bltu a4, t3, .LBB14_39 -; RV32I-NEXT: # %bb.38: -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: srl a3, a0, a4 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: bnez a4, .LBB14_40 -; RV32I-NEXT: j .LBB14_41 -; RV32I-NEXT: .LBB14_39: -; RV32I-NEXT: srl s8, t2, a4 -; RV32I-NEXT: srl a3, s0, a4 -; RV32I-NEXT: sll a5, a0, s6 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: beqz a4, .LBB14_41 +; RV32I-NEXT: or a4, a4, s0 +; RV32I-NEXT: .LBB14_38: +; RV32I-NEXT: li s0, 1 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB14_57 +; RV32I-NEXT: # %bb.39: +; RV32I-NEXT: beq t1, s0, .LBB14_58 ; RV32I-NEXT: .LBB14_40: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB14_59 ; RV32I-NEXT: .LBB14_41: -; RV32I-NEXT: bltu a4, t3, .LBB14_44 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: bgeu s7, t3, .LBB14_45 +; RV32I-NEXT: beq t1, s4, .LBB14_60 +; RV32I-NEXT: .LBB14_42: +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB14_61 ; RV32I-NEXT: .LBB14_43: -; RV32I-NEXT: sll s3, t4, s6 -; RV32I-NEXT: srl a3, t4, s10 -; RV32I-NEXT: or a3, a3, ra -; RV32I-NEXT: mv s10, t2 -; RV32I-NEXT: bnez s7, .LBB14_46 -; RV32I-NEXT: j .LBB14_47 +; RV32I-NEXT: li s4, 3 +; RV32I-NEXT: bne t1, s4, .LBB14_45 ; RV32I-NEXT: .LBB14_44: -; RV32I-NEXT: srl s1, a0, a4 -; RV32I-NEXT: bltu s7, t3, .LBB14_43 +; RV32I-NEXT: or a4, s10, t6 ; RV32I-NEXT: .LBB14_45: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t4, s7 -; RV32I-NEXT: mv s10, t2 -; RV32I-NEXT: beqz s7, .LBB14_47 -; RV32I-NEXT: .LBB14_46: -; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: bnez t5, .LBB14_62 +; RV32I-NEXT: # %bb.46: +; RV32I-NEXT: beq t1, s4, .LBB14_63 ; RV32I-NEXT: .LBB14_47: -; RV32I-NEXT: bltu s9, t3, .LBB14_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl a3, t2, s9 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: bnez s9, .LBB14_50 -; RV32I-NEXT: j .LBB14_51 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: bnez t5, .LBB14_64 +; RV32I-NEXT: .LBB14_48: +; RV32I-NEXT: beq t1, s9, .LBB14_65 ; RV32I-NEXT: .LBB14_49: -; RV32I-NEXT: sll a3, t2, s11 -; RV32I-NEXT: or a3, s2, a3 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: beqz s9, .LBB14_51 +; RV32I-NEXT: mv t6, s1 +; RV32I-NEXT: bne t1, s2, .LBB14_66 ; RV32I-NEXT: .LBB14_50: -; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB14_67 ; RV32I-NEXT: .LBB14_51: -; RV32I-NEXT: bltu s9, t3, .LBB14_53 -; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bltu a4, t6, .LBB14_54 -; RV32I-NEXT: j .LBB14_55 +; RV32I-NEXT: beqz a7, .LBB14_53 +; RV32I-NEXT: .LBB14_52: +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: .LBB14_53: -; RV32I-NEXT: srl s7, t2, a4 -; RV32I-NEXT: bgeu a4, t6, .LBB14_55 -; RV32I-NEXT: .LBB14_54: -; RV32I-NEXT: or s2, a5, s3 -; RV32I-NEXT: or s7, s1, s10 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t6, 2 +; RV32I-NEXT: beqz t5, .LBB14_55 +; RV32I-NEXT: # %bb.54: +; RV32I-NEXT: mv a4, t3 ; RV32I-NEXT: .LBB14_55: -; RV32I-NEXT: li a3, 128 -; RV32I-NEXT: mv a5, s0 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: beqz a4, .LBB14_57 +; RV32I-NEXT: beqz t1, .LBB14_68 ; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv a5, s2 -; RV32I-NEXT: mv s1, s7 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB14_69 ; RV32I-NEXT: .LBB14_57: -; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s2, a3, a4 -; RV32I-NEXT: bltu a4, t6, .LBB14_59 -; RV32I-NEXT: # %bb.58: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s8, 0 +; RV32I-NEXT: mv t6, t3 +; RV32I-NEXT: bne t1, s0, .LBB14_40 +; RV32I-NEXT: .LBB14_58: +; RV32I-NEXT: or a4, t4, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB14_41 ; RV32I-NEXT: .LBB14_59: -; RV32I-NEXT: neg s3, s2 -; RV32I-NEXT: srl a5, t0, s3 -; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t3, .LBB14_61 -; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: li s10, 0 -; RV32I-NEXT: sll a3, t0, s2 -; RV32I-NEXT: j .LBB14_62 +; RV32I-NEXT: mv t6, s11 +; RV32I-NEXT: bne t1, s4, .LBB14_42 +; RV32I-NEXT: .LBB14_60: +; RV32I-NEXT: or a4, ra, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB14_43 ; RV32I-NEXT: .LBB14_61: -; RV32I-NEXT: sll s10, t0, s6 -; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: lw t6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s4, 3 +; RV32I-NEXT: beq t1, s4, .LBB14_44 +; RV32I-NEXT: j .LBB14_45 ; RV32I-NEXT: .LBB14_62: -; RV32I-NEXT: sub s1, t6, s2 -; RV32I-NEXT: mv s8, a6 -; RV32I-NEXT: beqz s2, .LBB14_64 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv s8, a3 +; RV32I-NEXT: mv t6, s7 +; RV32I-NEXT: bne t1, s4, .LBB14_47 +; RV32I-NEXT: .LBB14_63: +; RV32I-NEXT: or a4, s8, t6 +; RV32I-NEXT: li t6, 0 +; RV32I-NEXT: beqz t5, .LBB14_48 ; RV32I-NEXT: .LBB14_64: -; RV32I-NEXT: bltu s1, t3, .LBB14_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: srl a3, a6, s1 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: bnez s1, .LBB14_67 -; RV32I-NEXT: j .LBB14_68 +; RV32I-NEXT: mv t6, s5 +; RV32I-NEXT: bne t1, s9, .LBB14_49 +; RV32I-NEXT: .LBB14_65: +; RV32I-NEXT: or a4, s6, t6 +; RV32I-NEXT: mv t6, s1 +; RV32I-NEXT: beq t1, s2, .LBB14_50 ; RV32I-NEXT: .LBB14_66: -; RV32I-NEXT: neg a3, s1 -; RV32I-NEXT: sll a3, a6, a3 -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: beqz s1, .LBB14_68 +; RV32I-NEXT: mv t6, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB14_51 ; RV32I-NEXT: .LBB14_67: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a4, t6 +; RV32I-NEXT: bnez a7, .LBB14_52 +; RV32I-NEXT: j .LBB14_53 ; RV32I-NEXT: .LBB14_68: -; RV32I-NEXT: bltu s1, t3, .LBB14_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, t3, .LBB14_72 -; RV32I-NEXT: .LBB14_70: -; RV32I-NEXT: sll s6, t1, s6 -; RV32I-NEXT: srl a3, t1, s3 -; RV32I-NEXT: lw s3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB14_73 +; RV32I-NEXT: or a4, t4, a4 +; RV32I-NEXT: .LBB14_69: +; RV32I-NEXT: li t4, 3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_84 +; RV32I-NEXT: # %bb.70: +; RV32I-NEXT: beq t1, s0, .LBB14_85 ; RV32I-NEXT: .LBB14_71: -; RV32I-NEXT: srl s1, a6, s3 -; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t3, .LBB14_70 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_86 ; RV32I-NEXT: .LBB14_72: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: sll a3, t1, s2 +; RV32I-NEXT: beq t1, t6, .LBB14_87 ; RV32I-NEXT: .LBB14_73: -; RV32I-NEXT: addi s9, s2, -64 -; RV32I-NEXT: mv s5, t5 -; RV32I-NEXT: beqz s2, .LBB14_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_88 +; RV32I-NEXT: .LBB14_74: +; RV32I-NEXT: beq t1, t4, .LBB14_89 ; RV32I-NEXT: .LBB14_75: -; RV32I-NEXT: bltu s9, t3, .LBB14_77 -; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t0, s9 -; RV32I-NEXT: mv s7, a6 -; RV32I-NEXT: bnez s9, .LBB14_78 -; RV32I-NEXT: j .LBB14_79 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_90 +; RV32I-NEXT: .LBB14_76: +; RV32I-NEXT: beq t1, s4, .LBB14_91 ; RV32I-NEXT: .LBB14_77: -; RV32I-NEXT: sll s3, t0, s2 -; RV32I-NEXT: neg a3, s9 -; RV32I-NEXT: srl a3, t0, a3 -; RV32I-NEXT: sll s7, a6, s2 -; RV32I-NEXT: or a3, a3, s7 -; RV32I-NEXT: mv s7, a6 -; RV32I-NEXT: beqz s9, .LBB14_79 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s9, .LBB14_92 ; RV32I-NEXT: .LBB14_78: -; RV32I-NEXT: mv s7, a3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB14_93 ; RV32I-NEXT: .LBB14_79: -; RV32I-NEXT: bltu s2, t6, .LBB14_81 -; RV32I-NEXT: # %bb.80: -; RV32I-NEXT: sw zero, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: j .LBB14_82 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB14_94 +; RV32I-NEXT: .LBB14_80: +; RV32I-NEXT: bnez a7, .LBB14_95 ; RV32I-NEXT: .LBB14_81: -; RV32I-NEXT: sw s10, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s3, a5, s6 -; RV32I-NEXT: or s7, s1, s5 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB14_96 ; RV32I-NEXT: .LBB14_82: -; RV32I-NEXT: addi ra, a4, -128 -; RV32I-NEXT: mv s5, t1 -; RV32I-NEXT: mv s6, t5 -; RV32I-NEXT: beqz s2, .LBB14_84 -; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: mv s5, s3 -; RV32I-NEXT: mv s6, s7 +; RV32I-NEXT: beqz t1, .LBB14_97 +; RV32I-NEXT: .LBB14_83: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: j .LBB14_98 ; RV32I-NEXT: .LBB14_84: -; RV32I-NEXT: neg s9, ra -; RV32I-NEXT: sll s3, t5, s9 -; RV32I-NEXT: bltu ra, t3, .LBB14_86 -; RV32I-NEXT: # %bb.85: -; RV32I-NEXT: srl a3, t5, ra -; RV32I-NEXT: mv s1, t1 -; RV32I-NEXT: bnez ra, .LBB14_87 -; RV32I-NEXT: j .LBB14_88 +; RV32I-NEXT: mv t3, s11 +; RV32I-NEXT: bne t1, s0, .LBB14_71 +; RV32I-NEXT: .LBB14_85: +; RV32I-NEXT: or a4, ra, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_72 ; RV32I-NEXT: .LBB14_86: -; RV32I-NEXT: lw a3, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: mv s1, t1 -; RV32I-NEXT: beqz ra, .LBB14_88 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, t6, .LBB14_73 ; RV32I-NEXT: .LBB14_87: -; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: or a4, s10, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_74 ; RV32I-NEXT: .LBB14_88: -; RV32I-NEXT: bltu ra, t3, .LBB14_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: srl a3, a6, ra -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: bnez ra, .LBB14_91 -; RV32I-NEXT: j .LBB14_92 +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bne t1, t4, .LBB14_75 +; RV32I-NEXT: .LBB14_89: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_76 ; RV32I-NEXT: .LBB14_90: -; RV32I-NEXT: srl s2, t5, a4 -; RV32I-NEXT: sll a3, a6, s9 -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a5, a3 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: beqz ra, .LBB14_92 +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, s4, .LBB14_77 ; RV32I-NEXT: .LBB14_91: -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s9, .LBB14_78 ; RV32I-NEXT: .LBB14_92: -; RV32I-NEXT: sub s10, t6, ra -; RV32I-NEXT: bltu ra, t3, .LBB14_95 -; RV32I-NEXT: # %bb.93: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bgeu s10, t3, .LBB14_96 +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB14_79 +; RV32I-NEXT: .LBB14_93: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB14_80 ; RV32I-NEXT: .LBB14_94: -; RV32I-NEXT: sll s9, t1, s9 -; RV32I-NEXT: neg a3, s10 -; RV32I-NEXT: srl a3, t1, a3 -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB14_97 +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB14_81 ; RV32I-NEXT: .LBB14_95: -; RV32I-NEXT: srl s7, a6, a4 -; RV32I-NEXT: bltu s10, t3, .LBB14_94 +; RV32I-NEXT: mv a3, t3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB14_82 ; RV32I-NEXT: .LBB14_96: -; RV32I-NEXT: li s9, 0 -; RV32I-NEXT: sll a3, t1, s10 +; RV32I-NEXT: mv a4, s11 +; RV32I-NEXT: bnez t1, .LBB14_83 ; RV32I-NEXT: .LBB14_97: -; RV32I-NEXT: addi s11, ra, -64 -; RV32I-NEXT: mv s3, t5 -; RV32I-NEXT: beqz s10, .LBB14_99 -; RV32I-NEXT: # %bb.98: -; RV32I-NEXT: mv s3, a3 -; RV32I-NEXT: .LBB14_99: -; RV32I-NEXT: bltu s11, t3, .LBB14_101 -; RV32I-NEXT: # %bb.100: -; RV32I-NEXT: srl a3, t5, s11 -; RV32I-NEXT: bnez s11, .LBB14_102 -; RV32I-NEXT: j .LBB14_103 +; RV32I-NEXT: or a4, ra, a4 +; RV32I-NEXT: .LBB14_98: +; RV32I-NEXT: lw ra, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_112 +; RV32I-NEXT: # %bb.99: +; RV32I-NEXT: beq t1, s0, .LBB14_113 +; RV32I-NEXT: .LBB14_100: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_114 ; RV32I-NEXT: .LBB14_101: -; RV32I-NEXT: srl a3, t1, ra -; RV32I-NEXT: neg s10, s11 -; RV32I-NEXT: sll s10, t5, s10 -; RV32I-NEXT: or a3, a3, s10 -; RV32I-NEXT: beqz s11, .LBB14_103 +; RV32I-NEXT: beq t1, t6, .LBB14_115 ; RV32I-NEXT: .LBB14_102: -; RV32I-NEXT: mv t1, a3 -; RV32I-NEXT: .LBB14_103: -; RV32I-NEXT: bltu s11, t3, .LBB14_105 -; RV32I-NEXT: # %bb.104: ; RV32I-NEXT: li t3, 0 -; RV32I-NEXT: bltu ra, t6, .LBB14_106 -; RV32I-NEXT: j .LBB14_107 +; RV32I-NEXT: bnez t5, .LBB14_116 +; RV32I-NEXT: .LBB14_103: +; RV32I-NEXT: beq t1, t4, .LBB14_117 +; RV32I-NEXT: .LBB14_104: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s4, .LBB14_118 ; RV32I-NEXT: .LBB14_105: -; RV32I-NEXT: srl t3, t5, ra -; RV32I-NEXT: bgeu ra, t6, .LBB14_107 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB14_119 ; RV32I-NEXT: .LBB14_106: -; RV32I-NEXT: or t1, a5, s9 -; RV32I-NEXT: or t3, s7, s3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB14_120 ; RV32I-NEXT: .LBB14_107: -; RV32I-NEXT: li a5, 128 -; RV32I-NEXT: bnez ra, .LBB14_114 -; RV32I-NEXT: # %bb.108: -; RV32I-NEXT: bgeu ra, t6, .LBB14_115 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB14_121 +; RV32I-NEXT: .LBB14_108: +; RV32I-NEXT: bnez a7, .LBB14_122 ; RV32I-NEXT: .LBB14_109: -; RV32I-NEXT: bltu a4, a5, .LBB14_116 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB14_123 ; RV32I-NEXT: .LBB14_110: -; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bnez a4, .LBB14_117 +; RV32I-NEXT: beqz t1, .LBB14_124 ; RV32I-NEXT: .LBB14_111: -; RV32I-NEXT: bltu a4, a5, .LBB14_113 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_125 +; RV32I-NEXT: j .LBB14_126 ; RV32I-NEXT: .LBB14_112: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: lw t3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s0, .LBB14_100 ; RV32I-NEXT: .LBB14_113: -; RV32I-NEXT: srli a4, s0, 16 -; RV32I-NEXT: lui t1, 16 -; RV32I-NEXT: srli t0, s0, 24 -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: srli a6, t4, 16 -; RV32I-NEXT: srli s2, t4, 24 -; RV32I-NEXT: srli t3, t2, 16 -; RV32I-NEXT: srli s3, t2, 24 -; RV32I-NEXT: srli s1, s4, 16 -; RV32I-NEXT: srli a3, s4, 24 -; RV32I-NEXT: srli t6, a7, 16 -; RV32I-NEXT: srli s6, a7, 24 -; RV32I-NEXT: srli s5, a1, 16 -; RV32I-NEXT: srli s7, a1, 24 -; RV32I-NEXT: srli s8, ra, 16 -; RV32I-NEXT: srli s9, ra, 24 -; RV32I-NEXT: addi t1, t1, -1 -; RV32I-NEXT: and s10, s0, t1 -; RV32I-NEXT: and s11, a0, t1 +; RV32I-NEXT: or a4, s10, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_101 +; RV32I-NEXT: .LBB14_114: +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: bne t1, t6, .LBB14_102 +; RV32I-NEXT: .LBB14_115: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_103 +; RV32I-NEXT: .LBB14_116: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, t4, .LBB14_104 +; RV32I-NEXT: .LBB14_117: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s4, .LBB14_105 +; RV32I-NEXT: .LBB14_118: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB14_106 +; RV32I-NEXT: .LBB14_119: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB14_107 +; RV32I-NEXT: .LBB14_120: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB14_108 +; RV32I-NEXT: .LBB14_121: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a7, .LBB14_109 +; RV32I-NEXT: .LBB14_122: +; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB14_110 +; RV32I-NEXT: .LBB14_123: +; RV32I-NEXT: lw a4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t1, .LBB14_111 +; RV32I-NEXT: .LBB14_124: +; RV32I-NEXT: or a4, s10, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_126 +; RV32I-NEXT: .LBB14_125: +; RV32I-NEXT: mv t3, s7 +; RV32I-NEXT: .LBB14_126: +; RV32I-NEXT: beq t1, s0, .LBB14_138 +; RV32I-NEXT: # %bb.127: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_139 +; RV32I-NEXT: .LBB14_128: +; RV32I-NEXT: beq t1, t6, .LBB14_140 +; RV32I-NEXT: .LBB14_129: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, t4, .LBB14_141 +; RV32I-NEXT: .LBB14_130: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s4, .LBB14_142 +; RV32I-NEXT: .LBB14_131: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s9, .LBB14_143 +; RV32I-NEXT: .LBB14_132: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB14_144 +; RV32I-NEXT: .LBB14_133: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB14_145 +; RV32I-NEXT: .LBB14_134: +; RV32I-NEXT: bnez a7, .LBB14_146 +; RV32I-NEXT: .LBB14_135: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB14_147 +; RV32I-NEXT: .LBB14_136: +; RV32I-NEXT: beqz t1, .LBB14_148 +; RV32I-NEXT: .LBB14_137: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bnez t5, .LBB14_149 +; RV32I-NEXT: j .LBB14_150 +; RV32I-NEXT: .LBB14_138: +; RV32I-NEXT: or a4, s8, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_128 +; RV32I-NEXT: .LBB14_139: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: bne t1, t6, .LBB14_129 +; RV32I-NEXT: .LBB14_140: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, t4, .LBB14_130 +; RV32I-NEXT: .LBB14_141: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s4, .LBB14_131 +; RV32I-NEXT: .LBB14_142: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s9, .LBB14_132 +; RV32I-NEXT: .LBB14_143: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB14_133 +; RV32I-NEXT: .LBB14_144: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB14_134 +; RV32I-NEXT: .LBB14_145: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB14_135 +; RV32I-NEXT: .LBB14_146: +; RV32I-NEXT: mv a6, t3 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB14_136 +; RV32I-NEXT: .LBB14_147: +; RV32I-NEXT: mv a4, s7 +; RV32I-NEXT: bnez t1, .LBB14_137 +; RV32I-NEXT: .LBB14_148: +; RV32I-NEXT: or a4, s8, a4 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beqz t5, .LBB14_150 +; RV32I-NEXT: .LBB14_149: +; RV32I-NEXT: mv t3, s5 +; RV32I-NEXT: .LBB14_150: +; RV32I-NEXT: beq t1, s0, .LBB14_161 +; RV32I-NEXT: # %bb.151: +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, t6, .LBB14_162 +; RV32I-NEXT: .LBB14_152: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t4, .LBB14_163 +; RV32I-NEXT: .LBB14_153: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s4, .LBB14_164 +; RV32I-NEXT: .LBB14_154: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB14_165 +; RV32I-NEXT: .LBB14_155: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB14_166 +; RV32I-NEXT: .LBB14_156: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB14_167 +; RV32I-NEXT: .LBB14_157: +; RV32I-NEXT: bnez a7, .LBB14_168 +; RV32I-NEXT: .LBB14_158: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bnez t5, .LBB14_169 +; RV32I-NEXT: .LBB14_159: +; RV32I-NEXT: beqz t1, .LBB14_170 +; RV32I-NEXT: .LBB14_160: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: bne t1, s0, .LBB14_171 +; RV32I-NEXT: j .LBB14_172 +; RV32I-NEXT: .LBB14_161: +; RV32I-NEXT: or a4, s6, t3 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, t6, .LBB14_152 +; RV32I-NEXT: .LBB14_162: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, t4, .LBB14_153 +; RV32I-NEXT: .LBB14_163: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s4, .LBB14_154 +; RV32I-NEXT: .LBB14_164: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB14_155 +; RV32I-NEXT: .LBB14_165: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB14_156 +; RV32I-NEXT: .LBB14_166: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB14_157 +; RV32I-NEXT: .LBB14_167: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: beqz a7, .LBB14_158 +; RV32I-NEXT: .LBB14_168: +; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beqz t5, .LBB14_159 +; RV32I-NEXT: .LBB14_169: +; RV32I-NEXT: mv a4, s5 +; RV32I-NEXT: bnez t1, .LBB14_160 +; RV32I-NEXT: .LBB14_170: +; RV32I-NEXT: or a4, s6, a4 +; RV32I-NEXT: mv t3, s1 +; RV32I-NEXT: beq t1, s0, .LBB14_172 +; RV32I-NEXT: .LBB14_171: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: .LBB14_172: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t6, .LBB14_190 +; RV32I-NEXT: # %bb.173: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, t4, .LBB14_191 +; RV32I-NEXT: .LBB14_174: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s4, .LBB14_192 +; RV32I-NEXT: .LBB14_175: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s9, .LBB14_193 +; RV32I-NEXT: .LBB14_176: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s2, .LBB14_194 +; RV32I-NEXT: .LBB14_177: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s3, .LBB14_195 +; RV32I-NEXT: .LBB14_178: +; RV32I-NEXT: bnez a7, .LBB14_196 +; RV32I-NEXT: .LBB14_179: +; RV32I-NEXT: bnez t1, .LBB14_197 +; RV32I-NEXT: .LBB14_180: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s0, .LBB14_198 +; RV32I-NEXT: .LBB14_181: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, t6, .LBB14_199 +; RV32I-NEXT: .LBB14_182: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, t4, .LBB14_200 +; RV32I-NEXT: .LBB14_183: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s4, .LBB14_201 +; RV32I-NEXT: .LBB14_184: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s9, .LBB14_202 +; RV32I-NEXT: .LBB14_185: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bne t1, s2, .LBB14_203 +; RV32I-NEXT: .LBB14_186: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: bne t1, s3, .LBB14_204 +; RV32I-NEXT: .LBB14_187: +; RV32I-NEXT: beqz a7, .LBB14_189 +; RV32I-NEXT: .LBB14_188: +; RV32I-NEXT: mv t2, a4 +; RV32I-NEXT: .LBB14_189: +; RV32I-NEXT: srli a4, ra, 16 +; RV32I-NEXT: lui t4, 16 +; RV32I-NEXT: srli t3, ra, 24 +; RV32I-NEXT: srli a7, a1, 16 +; RV32I-NEXT: srli t6, a1, 24 +; RV32I-NEXT: srli t1, a3, 16 +; RV32I-NEXT: srli s2, a3, 24 +; RV32I-NEXT: srli t5, a5, 16 +; RV32I-NEXT: srli s3, a5, 24 +; RV32I-NEXT: srli s1, a6, 16 +; RV32I-NEXT: srli s6, a6, 24 +; RV32I-NEXT: srli s0, t0, 16 +; RV32I-NEXT: srli s5, t0, 24 +; RV32I-NEXT: srli s4, a0, 16 +; RV32I-NEXT: srli s7, a0, 24 +; RV32I-NEXT: srli s8, t2, 16 +; RV32I-NEXT: srli s9, t2, 24 +; RV32I-NEXT: addi t4, t4, -1 +; RV32I-NEXT: and s10, ra, t4 +; RV32I-NEXT: and s11, a1, t4 ; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb s0, 0(a2) +; RV32I-NEXT: sb ra, 0(a2) ; RV32I-NEXT: sb s10, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t0, 3(a2) -; RV32I-NEXT: and a4, t4, t1 -; RV32I-NEXT: srli t0, s11, 8 -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: sb t0, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t5, 7(a2) -; RV32I-NEXT: and a0, t2, t1 +; RV32I-NEXT: sb t3, 3(a2) +; RV32I-NEXT: and a4, a3, t4 +; RV32I-NEXT: srli t3, s11, 8 +; RV32I-NEXT: sb a1, 4(a2) +; RV32I-NEXT: sb t3, 5(a2) +; RV32I-NEXT: sb a7, 6(a2) +; RV32I-NEXT: sb t6, 7(a2) +; RV32I-NEXT: and a1, a5, t4 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t4, 8(a2) +; RV32I-NEXT: sb a3, 8(a2) ; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb a6, 10(a2) +; RV32I-NEXT: sb t1, 10(a2) ; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, s4, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb t2, 12(a2) -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t3, 14(a2) +; RV32I-NEXT: and a3, a6, t4 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb a5, 12(a2) +; RV32I-NEXT: sb a1, 13(a2) +; RV32I-NEXT: sb t5, 14(a2) ; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a0, a7, t1 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s4, 16(a2) -; RV32I-NEXT: sb a4, 17(a2) +; RV32I-NEXT: and a1, t0, t4 +; RV32I-NEXT: srli a3, a3, 8 +; RV32I-NEXT: sb a6, 16(a2) +; RV32I-NEXT: sb a3, 17(a2) ; RV32I-NEXT: sb s1, 18(a2) -; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a3, a1, t1 -; RV32I-NEXT: and a4, ra, t1 -; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb s6, 19(a2) +; RV32I-NEXT: and a3, a0, t4 +; RV32I-NEXT: and a4, t2, t4 +; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 20(a2) -; RV32I-NEXT: sb a0, 21(a2) -; RV32I-NEXT: sb t6, 22(a2) -; RV32I-NEXT: sb s6, 23(a2) -; RV32I-NEXT: sb a1, 24(a2) +; RV32I-NEXT: sb t0, 20(a2) +; RV32I-NEXT: sb a1, 21(a2) +; RV32I-NEXT: sb s0, 22(a2) +; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a3, 25(a2) -; RV32I-NEXT: sb s5, 26(a2) +; RV32I-NEXT: sb s4, 26(a2) ; RV32I-NEXT: sb s7, 27(a2) -; RV32I-NEXT: sb ra, 28(a2) +; RV32I-NEXT: sb t2, 28(a2) ; RV32I-NEXT: sb a4, 29(a2) ; RV32I-NEXT: sb s8, 30(a2) ; RV32I-NEXT: sb s9, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 64 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB14_114: -; RV32I-NEXT: mv t0, t1 -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: bltu ra, t6, .LBB14_109 -; RV32I-NEXT: .LBB14_115: +; RV32I-NEXT: .LBB14_190: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, t4, .LBB14_174 +; RV32I-NEXT: .LBB14_191: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s4, .LBB14_175 +; RV32I-NEXT: .LBB14_192: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s9, .LBB14_176 +; RV32I-NEXT: .LBB14_193: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s2, .LBB14_177 +; RV32I-NEXT: .LBB14_194: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s3, .LBB14_178 +; RV32I-NEXT: .LBB14_195: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: beqz a7, .LBB14_179 +; RV32I-NEXT: .LBB14_196: +; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: beqz t1, .LBB14_180 +; RV32I-NEXT: .LBB14_197: ; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: bgeu a4, a5, .LBB14_110 -; RV32I-NEXT: .LBB14_116: -; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t0, a3, a6 -; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a6, a3, s8 -; RV32I-NEXT: lw a3, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s1, a3, s5 -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s2, a3, s6 -; RV32I-NEXT: lw ra, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a4, .LBB14_111 -; RV32I-NEXT: .LBB14_117: -; RV32I-NEXT: mv s0, t0 -; RV32I-NEXT: mv a0, a6 -; RV32I-NEXT: mv t4, s1 -; RV32I-NEXT: mv t2, s2 -; RV32I-NEXT: bgeu a4, a5, .LBB14_112 -; RV32I-NEXT: j .LBB14_113 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s0, .LBB14_181 +; RV32I-NEXT: .LBB14_198: +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, t6, .LBB14_182 +; RV32I-NEXT: .LBB14_199: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, t4, .LBB14_183 +; RV32I-NEXT: .LBB14_200: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s4, .LBB14_184 +; RV32I-NEXT: .LBB14_201: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s9, .LBB14_185 +; RV32I-NEXT: .LBB14_202: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: beq t1, s2, .LBB14_186 +; RV32I-NEXT: .LBB14_203: +; RV32I-NEXT: mv t3, a4 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: beq t1, s3, .LBB14_187 +; RV32I-NEXT: .LBB14_204: +; RV32I-NEXT: mv a4, t3 +; RV32I-NEXT: bnez a7, .LBB14_188 +; RV32I-NEXT: j .LBB14_189 %src = load i256, ptr %src.ptr, align 1 %dwordOff = load i256, ptr %dwordOff.ptr, align 1 %bitOff = shl i256 %dwordOff, 6 @@ -5753,605 +6431,812 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: shl_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu t1, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: lbu a6, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or a5, a7, a5 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or t0, t3, t2 -; RV32I-NEXT: lbu t1, 1(a1) -; RV32I-NEXT: lbu t2, 0(a1) -; RV32I-NEXT: lbu t3, 2(a1) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: li s9, 64 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a7, a7, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t3 -; RV32I-NEXT: li t4, 32 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or t3, a5, a4 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, a1, t1 -; RV32I-NEXT: slli a4, a4, 3 -; RV32I-NEXT: neg s10, a4 -; RV32I-NEXT: srl t5, t3, s10 -; RV32I-NEXT: sll s5, a5, a4 -; RV32I-NEXT: bltu a4, t4, .LBB15_2 +; RV32I-NEXT: or a6, a5, a3 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: srli a5, a1, 5 +; RV32I-NEXT: sll t5, a6, a1 +; RV32I-NEXT: li s7, 1 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beqz a5, .LBB15_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: sll a7, t3, a4 -; RV32I-NEXT: j .LBB15_3 +; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: sll s8, t3, a4 -; RV32I-NEXT: or a7, t5, s5 -; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: lbu t2, 9(a0) -; RV32I-NEXT: lbu a1, 10(a0) -; RV32I-NEXT: lbu t1, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t6, a3, 8 -; RV32I-NEXT: sub s6, s9, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a4, .LBB15_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: .LBB15_5: -; RV32I-NEXT: slli a7, t2, 8 -; RV32I-NEXT: or a6, a6, a1 -; RV32I-NEXT: lbu t2, 8(a0) -; RV32I-NEXT: lbu a1, 12(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t6, t0 -; RV32I-NEXT: neg t6, s6 -; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s6, t4, .LBB15_7 -; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srl t6, a5, s6 -; RV32I-NEXT: j .LBB15_8 -; RV32I-NEXT: .LBB15_7: -; RV32I-NEXT: sll t6, a5, t6 -; RV32I-NEXT: or t6, t5, t6 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li s8, 2 +; RV32I-NEXT: beq a5, s7, .LBB15_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a4, a3 +; RV32I-NEXT: .LBB15_4: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li s0, 3 +; RV32I-NEXT: beq a5, s8, .LBB15_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a7, a4 +; RV32I-NEXT: .LBB15_6: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: beq a5, s0, .LBB15_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB15_8: -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: slli t2, a6, 16 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: slli t0, t0, 16 -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: beqz s6, .LBB15_10 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: li s5, 5 +; RV32I-NEXT: beq a5, s4, .LBB15_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a6, t6 +; RV32I-NEXT: mv t0, a4 ; RV32I-NEXT: .LBB15_10: -; RV32I-NEXT: or t1, t2, a7 -; RV32I-NEXT: or t2, t0, a1 -; RV32I-NEXT: bltu s6, t4, .LBB15_12 -; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: j .LBB15_13 +; RV32I-NEXT: beq a5, s5, .LBB15_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB15_12: -; RV32I-NEXT: srl a7, a5, s10 -; RV32I-NEXT: .LBB15_13: -; RV32I-NEXT: srl s0, t1, s10 -; RV32I-NEXT: sll a1, t2, a4 -; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t4, .LBB15_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sll a1, t1, a4 -; RV32I-NEXT: j .LBB15_16 -; RV32I-NEXT: .LBB15_15: -; RV32I-NEXT: sll s1, t1, a4 -; RV32I-NEXT: or a1, s0, a1 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: lbu t0, 5(a0) +; RV32I-NEXT: lbu t1, 6(a0) +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a5, s1, .LBB15_14 +; RV32I-NEXT: # %bb.13: +; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: .LBB15_14: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: li ra, 7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: beq a5, ra, .LBB15_16 +; RV32I-NEXT: # %bb.15: +; RV32I-NEXT: mv a7, a4 ; RV32I-NEXT: .LBB15_16: -; RV32I-NEXT: addi s7, a4, -64 -; RV32I-NEXT: mv s3, t2 -; RV32I-NEXT: beqz a4, .LBB15_18 +; RV32I-NEXT: or a3, t0, a3 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: andi t6, a1, 31 +; RV32I-NEXT: mv a4, a6 +; RV32I-NEXT: beqz a1, .LBB15_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB15_18: -; RV32I-NEXT: neg a1, s7 -; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s7, t4, .LBB15_20 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a7, t1, a3 +; RV32I-NEXT: neg s3, t6 +; RV32I-NEXT: srl s11, a6, s3 +; RV32I-NEXT: beqz t6, .LBB15_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: sll a1, t3, s7 -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: bnez s7, .LBB15_21 -; RV32I-NEXT: j .LBB15_22 +; RV32I-NEXT: mv t0, s11 ; RV32I-NEXT: .LBB15_20: -; RV32I-NEXT: sll s2, t3, a4 -; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: or a1, a1, s5 -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: beqz s7, .LBB15_22 -; RV32I-NEXT: .LBB15_21: -; RV32I-NEXT: mv s4, a1 -; RV32I-NEXT: .LBB15_22: -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bltu a4, s9, .LBB15_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sll s10, a7, a1 +; RV32I-NEXT: beqz a5, .LBB15_22 +; RV32I-NEXT: # %bb.21: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: j .LBB15_25 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: bne a5, s7, .LBB15_23 +; RV32I-NEXT: j .LBB15_24 +; RV32I-NEXT: .LBB15_22: +; RV32I-NEXT: or a3, s10, t0 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beq a5, s7, .LBB15_24 +; RV32I-NEXT: .LBB15_23: +; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: .LBB15_24: -; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, a6, s1 -; RV32I-NEXT: or s4, a7, s3 -; RV32I-NEXT: .LBB15_25: -; RV32I-NEXT: sub ra, a1, a4 -; RV32I-NEXT: mv a7, t1 -; RV32I-NEXT: mv a6, t2 -; RV32I-NEXT: beqz a4, .LBB15_27 -; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: mv a7, s2 -; RV32I-NEXT: mv a6, s4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s8, .LBB15_40 +; RV32I-NEXT: # %bb.25: +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bne a5, s0, .LBB15_41 +; RV32I-NEXT: .LBB15_26: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s4, .LBB15_28 ; RV32I-NEXT: .LBB15_27: -; RV32I-NEXT: neg s1, ra -; RV32I-NEXT: sll s2, t2, s1 -; RV32I-NEXT: bltu ra, t4, .LBB15_29 -; RV32I-NEXT: # %bb.28: -; RV32I-NEXT: srl a1, t2, ra -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bnez ra, .LBB15_30 -; RV32I-NEXT: j .LBB15_31 -; RV32I-NEXT: .LBB15_29: -; RV32I-NEXT: or a1, s0, s2 -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz ra, .LBB15_31 +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: .LBB15_28: +; RV32I-NEXT: lbu t2, 11(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s5, .LBB15_30 +; RV32I-NEXT: # %bb.29: +; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB15_30: -; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB15_31: -; RV32I-NEXT: bltu ra, t4, .LBB15_33 -; RV32I-NEXT: # %bb.32: -; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: srl a1, a5, ra -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: bnez ra, .LBB15_34 -; RV32I-NEXT: j .LBB15_35 -; RV32I-NEXT: .LBB15_33: -; RV32I-NEXT: srl a1, t2, s10 -; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sll a1, a5, s1 -; RV32I-NEXT: or a1, t5, a1 -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: beqz ra, .LBB15_35 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t1, 9(a0) +; RV32I-NEXT: lbu a3, 10(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a5, s1, .LBB15_32 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: mv a6, t0 +; RV32I-NEXT: .LBB15_32: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t3, 8(a0) +; RV32I-NEXT: or t2, t2, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB15_34 +; RV32I-NEXT: # %bb.33: +; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB15_34: -; RV32I-NEXT: mv t5, a1 -; RV32I-NEXT: .LBB15_35: -; RV32I-NEXT: sub s3, s9, ra -; RV32I-NEXT: bltu ra, t4, .LBB15_38 -; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s3, t4, .LBB15_39 -; RV32I-NEXT: .LBB15_37: -; RV32I-NEXT: sll s1, t1, s1 -; RV32I-NEXT: neg a1, s3 -; RV32I-NEXT: srl a1, t1, a1 -; RV32I-NEXT: or a1, a1, s2 -; RV32I-NEXT: j .LBB15_40 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli a6, t2, 16 +; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: beqz a1, .LBB15_36 +; RV32I-NEXT: # %bb.35: +; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: .LBB15_36: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a6, a6, a3 +; RV32I-NEXT: srl s2, a7, s3 +; RV32I-NEXT: beqz t6, .LBB15_38 +; RV32I-NEXT: # %bb.37: +; RV32I-NEXT: mv t0, s2 ; RV32I-NEXT: .LBB15_38: -; RV32I-NEXT: srl a1, a5, s10 -; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s3, t4, .LBB15_37 -; RV32I-NEXT: .LBB15_39: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sll a1, t1, s3 +; RV32I-NEXT: sll s9, a6, a1 +; RV32I-NEXT: beqz a5, .LBB15_42 +; RV32I-NEXT: # %bb.39: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_43 +; RV32I-NEXT: j .LBB15_44 ; RV32I-NEXT: .LBB15_40: -; RV32I-NEXT: addi s4, ra, -64 -; RV32I-NEXT: mv s2, t2 -; RV32I-NEXT: beqz s3, .LBB15_42 -; RV32I-NEXT: # %bb.41: -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beq a5, s0, .LBB15_26 +; RV32I-NEXT: .LBB15_41: +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s4, .LBB15_27 +; RV32I-NEXT: j .LBB15_28 ; RV32I-NEXT: .LBB15_42: -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s5, a7 -; RV32I-NEXT: bltu s4, t4, .LBB15_44 -; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: srl t0, t2, s4 -; RV32I-NEXT: j .LBB15_45 +; RV32I-NEXT: or a7, s9, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_44 +; RV32I-NEXT: .LBB15_43: +; RV32I-NEXT: mv a3, s11 ; RV32I-NEXT: .LBB15_44: -; RV32I-NEXT: srl a1, t1, ra -; RV32I-NEXT: neg t0, s4 -; RV32I-NEXT: sll t0, t2, t0 -; RV32I-NEXT: or t0, a1, t0 -; RV32I-NEXT: .LBB15_45: -; RV32I-NEXT: mv s0, s10 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: lbu s8, 19(a0) -; RV32I-NEXT: lbu a1, 23(a0) -; RV32I-NEXT: mv s3, t1 -; RV32I-NEXT: beqz s4, .LBB15_47 -; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: mv s3, t0 +; RV32I-NEXT: beq a5, s7, .LBB15_61 +; RV32I-NEXT: # %bb.45: +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne a5, s8, .LBB15_62 +; RV32I-NEXT: .LBB15_46: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: bne a5, s0, .LBB15_63 ; RV32I-NEXT: .LBB15_47: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: lbu s10, 17(a0) -; RV32I-NEXT: lbu t0, 18(a0) -; RV32I-NEXT: lbu s9, 21(a0) -; RV32I-NEXT: lbu t6, 22(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: bltu s4, t4, .LBB15_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: j .LBB15_50 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s4, .LBB15_49 +; RV32I-NEXT: .LBB15_48: +; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB15_49: -; RV32I-NEXT: srl s4, t2, ra -; RV32I-NEXT: .LBB15_50: -; RV32I-NEXT: or s11, s8, t0 -; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu s8, 20(a0) -; RV32I-NEXT: slli s10, s10, 8 -; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: or t6, a1, t6 -; RV32I-NEXT: bgeu ra, a3, .LBB15_52 -; RV32I-NEXT: # %bb.51: -; RV32I-NEXT: or s3, t5, s1 -; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s4, a1, s2 -; RV32I-NEXT: .LBB15_52: -; RV32I-NEXT: or a1, s10, t0 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: or t0, s9, s8 -; RV32I-NEXT: slli t6, t6, 16 -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: beqz ra, .LBB15_54 -; RV32I-NEXT: # %bb.53: -; RV32I-NEXT: mv t5, s3 -; RV32I-NEXT: mv s1, s4 -; RV32I-NEXT: .LBB15_54: -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, s11, a1 -; RV32I-NEXT: or s1, t6, t0 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv a6, a7 -; RV32I-NEXT: mv a7, s0 -; RV32I-NEXT: bltu ra, a1, .LBB15_56 -; RV32I-NEXT: # %bb.55: -; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB15_56: -; RV32I-NEXT: srl s3, s2, a7 -; RV32I-NEXT: sll ra, s1, a4 -; RV32I-NEXT: mv a7, s5 -; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t4, .LBB15_58 -; RV32I-NEXT: # %bb.57: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sll a1, s2, a4 -; RV32I-NEXT: j .LBB15_59 -; RV32I-NEXT: .LBB15_58: -; RV32I-NEXT: sll a1, s2, a4 -; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: or a1, s3, ra +; RV32I-NEXT: lbu t3, 15(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s5, .LBB15_51 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB15_51: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu t1, 13(a0) +; RV32I-NEXT: lbu a3, 14(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s1, .LBB15_53 +; RV32I-NEXT: # %bb.52: +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: .LBB15_53: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 12(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB15_55 +; RV32I-NEXT: # %bb.54: +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: .LBB15_55: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv a7, a6 +; RV32I-NEXT: beqz a1, .LBB15_57 +; RV32I-NEXT: # %bb.56: +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: .LBB15_57: +; RV32I-NEXT: sw a7, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a7, t3, a3 +; RV32I-NEXT: srl a3, a6, s3 +; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t6, .LBB15_59 +; RV32I-NEXT: # %bb.58: +; RV32I-NEXT: lw t0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB15_59: -; RV32I-NEXT: lbu s9, 27(a0) -; RV32I-NEXT: lbu t6, 31(a0) -; RV32I-NEXT: mv t5, s1 -; RV32I-NEXT: beqz a4, .LBB15_61 +; RV32I-NEXT: sll a3, a7, a1 +; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a5, .LBB15_64 ; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: mv t5, a1 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_65 +; RV32I-NEXT: j .LBB15_66 ; RV32I-NEXT: .LBB15_61: -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s4, 26(a0) -; RV32I-NEXT: lbu s11, 29(a0) -; RV32I-NEXT: lbu s10, 30(a0) -; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: bltu s6, t4, .LBB15_63 -; RV32I-NEXT: # %bb.62: -; RV32I-NEXT: srl t0, s1, s6 -; RV32I-NEXT: j .LBB15_64 +; RV32I-NEXT: or a7, s10, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beq a5, s8, .LBB15_46 +; RV32I-NEXT: .LBB15_62: +; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: beq a5, s0, .LBB15_47 ; RV32I-NEXT: .LBB15_63: -; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, s1, a1 -; RV32I-NEXT: or t0, s3, a1 +; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s4, .LBB15_48 +; RV32I-NEXT: j .LBB15_49 ; RV32I-NEXT: .LBB15_64: -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: lbu s3, 24(a0) -; RV32I-NEXT: lbu a1, 28(a0) -; RV32I-NEXT: or s4, s9, s4 -; RV32I-NEXT: slli s11, s11, 8 -; RV32I-NEXT: or t6, t6, s10 -; RV32I-NEXT: mv s9, s2 -; RV32I-NEXT: beqz s6, .LBB15_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: or a6, a3, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_66 +; RV32I-NEXT: .LBB15_65: +; RV32I-NEXT: mv a3, s2 ; RV32I-NEXT: .LBB15_66: -; RV32I-NEXT: or a0, s8, s3 -; RV32I-NEXT: slli t0, s4, 16 -; RV32I-NEXT: or a1, s11, a1 -; RV32I-NEXT: slli t6, t6, 16 -; RV32I-NEXT: bltu s6, t4, .LBB15_68 +; RV32I-NEXT: beq a5, s7, .LBB15_84 ; RV32I-NEXT: # %bb.67: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: j .LBB15_69 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_85 ; RV32I-NEXT: .LBB15_68: -; RV32I-NEXT: srl s4, s1, s0 +; RV32I-NEXT: beq a5, s8, .LBB15_86 ; RV32I-NEXT: .LBB15_69: -; RV32I-NEXT: li s11, 64 -; RV32I-NEXT: or s6, t0, a0 -; RV32I-NEXT: or a0, t6, a1 -; RV32I-NEXT: bltu a4, t4, .LBB15_71 -; RV32I-NEXT: # %bb.70: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a1, s6, a4 -; RV32I-NEXT: mv s10, a0 -; RV32I-NEXT: bnez a4, .LBB15_72 -; RV32I-NEXT: j .LBB15_73 +; RV32I-NEXT: mv t0, t5 +; RV32I-NEXT: bne a5, s0, .LBB15_87 +; RV32I-NEXT: .LBB15_70: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s4, .LBB15_72 ; RV32I-NEXT: .LBB15_71: -; RV32I-NEXT: sll s3, s6, a4 -; RV32I-NEXT: srl a1, s6, s0 -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: mv s10, a0 -; RV32I-NEXT: beqz a4, .LBB15_73 +; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB15_72: -; RV32I-NEXT: mv s10, a1 -; RV32I-NEXT: .LBB15_73: -; RV32I-NEXT: bltu s7, t4, .LBB15_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: sll a1, s2, s7 -; RV32I-NEXT: mv s0, s1 -; RV32I-NEXT: bnez s7, .LBB15_76 -; RV32I-NEXT: j .LBB15_77 -; RV32I-NEXT: .LBB15_75: -; RV32I-NEXT: sll s5, s2, a4 -; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a1, s2, a1 -; RV32I-NEXT: or a1, a1, ra -; RV32I-NEXT: mv s0, s1 -; RV32I-NEXT: beqz s7, .LBB15_77 +; RV32I-NEXT: lbu t3, 19(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s5, .LBB15_74 +; RV32I-NEXT: # %bb.73: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB15_74: +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: lbu a3, 18(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s1, .LBB15_76 +; RV32I-NEXT: # %bb.75: +; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB15_76: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB15_77: -; RV32I-NEXT: bltu a4, s11, .LBB15_79 -; RV32I-NEXT: # %bb.78: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: li t5, 0 -; RV32I-NEXT: j .LBB15_80 -; RV32I-NEXT: .LBB15_79: -; RV32I-NEXT: or s5, s9, s3 -; RV32I-NEXT: or s0, s4, s10 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 16(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB15_78 +; RV32I-NEXT: # %bb.77: +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: .LBB15_78: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s6, a7 +; RV32I-NEXT: beqz a1, .LBB15_80 +; RV32I-NEXT: # %bb.79: +; RV32I-NEXT: mv s6, t0 ; RV32I-NEXT: .LBB15_80: -; RV32I-NEXT: addi s9, a4, -128 -; RV32I-NEXT: mv s7, s6 -; RV32I-NEXT: mv s8, a0 -; RV32I-NEXT: beqz a4, .LBB15_82 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a6, t3, a3 +; RV32I-NEXT: srl a3, a7, s3 +; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t6, .LBB15_82 ; RV32I-NEXT: # %bb.81: -; RV32I-NEXT: mv s7, s5 -; RV32I-NEXT: mv s8, s0 +; RV32I-NEXT: lw t0, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB15_82: -; RV32I-NEXT: neg s3, s9 -; RV32I-NEXT: srl s0, t3, s3 -; RV32I-NEXT: bltu s9, t4, .LBB15_84 +; RV32I-NEXT: sll a3, a6, a1 +; RV32I-NEXT: sw a3, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a5, .LBB15_88 ; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: sll a1, t3, s9 -; RV32I-NEXT: j .LBB15_85 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_89 +; RV32I-NEXT: j .LBB15_90 ; RV32I-NEXT: .LBB15_84: -; RV32I-NEXT: sll s5, t3, a4 -; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, s0, a1 +; RV32I-NEXT: or a6, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_68 ; RV32I-NEXT: .LBB15_85: -; RV32I-NEXT: sub s4, s11, s9 -; RV32I-NEXT: mv t6, a5 -; RV32I-NEXT: beqz s9, .LBB15_87 -; RV32I-NEXT: # %bb.86: -; RV32I-NEXT: mv t6, a1 +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne a5, s8, .LBB15_69 +; RV32I-NEXT: .LBB15_86: +; RV32I-NEXT: or a6, s10, a3 +; RV32I-NEXT: mv t0, t5 +; RV32I-NEXT: beq a5, s0, .LBB15_70 ; RV32I-NEXT: .LBB15_87: -; RV32I-NEXT: bltu s4, t4, .LBB15_89 -; RV32I-NEXT: # %bb.88: -; RV32I-NEXT: srl a1, a5, s4 -; RV32I-NEXT: mv s0, t3 -; RV32I-NEXT: bnez s4, .LBB15_90 -; RV32I-NEXT: j .LBB15_91 +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s4, .LBB15_71 +; RV32I-NEXT: j .LBB15_72 +; RV32I-NEXT: .LBB15_88: +; RV32I-NEXT: or a7, a3, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_90 ; RV32I-NEXT: .LBB15_89: -; RV32I-NEXT: neg a1, s4 -; RV32I-NEXT: sll a1, a5, a1 -; RV32I-NEXT: or a1, s0, a1 -; RV32I-NEXT: mv s0, t3 -; RV32I-NEXT: beqz s4, .LBB15_91 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB15_90: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB15_91: -; RV32I-NEXT: bltu s4, t4, .LBB15_94 -; RV32I-NEXT: # %bb.92: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: bgeu s9, t4, .LBB15_95 +; RV32I-NEXT: beq a5, s7, .LBB15_109 +; RV32I-NEXT: # %bb.91: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_110 +; RV32I-NEXT: .LBB15_92: +; RV32I-NEXT: beq a5, s8, .LBB15_111 ; RV32I-NEXT: .LBB15_93: -; RV32I-NEXT: sll s10, t1, a4 -; RV32I-NEXT: srl a1, t1, s3 -; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: j .LBB15_96 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_112 ; RV32I-NEXT: .LBB15_94: -; RV32I-NEXT: srl s4, a5, s3 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: bltu s9, t4, .LBB15_93 +; RV32I-NEXT: beq a5, s0, .LBB15_113 ; RV32I-NEXT: .LBB15_95: -; RV32I-NEXT: li s10, 0 -; RV32I-NEXT: sll a1, t1, s9 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beq a5, s4, .LBB15_97 ; RV32I-NEXT: .LBB15_96: -; RV32I-NEXT: addi s11, s9, -64 -; RV32I-NEXT: mv s3, t2 -; RV32I-NEXT: beqz s9, .LBB15_98 -; RV32I-NEXT: # %bb.97: -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: .LBB15_98: -; RV32I-NEXT: bltu s11, t4, .LBB15_100 -; RV32I-NEXT: # %bb.99: -; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: sll a1, t3, s11 -; RV32I-NEXT: bnez s11, .LBB15_101 -; RV32I-NEXT: j .LBB15_102 -; RV32I-NEXT: .LBB15_100: -; RV32I-NEXT: sll t4, t3, s9 -; RV32I-NEXT: neg a1, s11 -; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: sll t0, a5, s9 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: beqz s11, .LBB15_102 +; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: .LBB15_97: +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s5, .LBB15_99 +; RV32I-NEXT: # %bb.98: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB15_99: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu t1, 21(a0) +; RV32I-NEXT: lbu a3, 22(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s1, .LBB15_101 +; RV32I-NEXT: # %bb.100: +; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB15_101: -; RV32I-NEXT: mv a5, a1 -; RV32I-NEXT: .LBB15_102: -; RV32I-NEXT: bltu s9, ra, .LBB15_104 -; RV32I-NEXT: # %bb.103: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bnez s9, .LBB15_105 -; RV32I-NEXT: j .LBB15_106 -; RV32I-NEXT: .LBB15_104: -; RV32I-NEXT: or t4, s0, s10 -; RV32I-NEXT: or a5, s4, s3 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: beqz s9, .LBB15_106 +; RV32I-NEXT: sw s9, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 20(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB15_103 +; RV32I-NEXT: # %bb.102: +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: .LBB15_103: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s9, a6 +; RV32I-NEXT: beqz a1, .LBB15_105 +; RV32I-NEXT: # %bb.104: +; RV32I-NEXT: mv s9, t0 ; RV32I-NEXT: .LBB15_105: -; RV32I-NEXT: mv t1, t4 -; RV32I-NEXT: mv t2, a5 -; RV32I-NEXT: .LBB15_106: -; RV32I-NEXT: bltu a4, a1, .LBB15_108 -; RV32I-NEXT: # %bb.107: -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: or t0, t3, a3 +; RV32I-NEXT: srl a6, a6, s3 +; RV32I-NEXT: beqz t6, .LBB15_107 +; RV32I-NEXT: # %bb.106: +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: .LBB15_107: +; RV32I-NEXT: sll a3, t0, a1 +; RV32I-NEXT: sw a3, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a5, .LBB15_114 +; RV32I-NEXT: # %bb.108: +; RV32I-NEXT: li t1, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: bnez a4, .LBB15_109 -; RV32I-NEXT: j .LBB15_110 -; RV32I-NEXT: .LBB15_108: -; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s5, a1, a5 -; RV32I-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t6, a1, t5 -; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t1, a1, s7 -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t2, a1, s8 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a4, .LBB15_110 +; RV32I-NEXT: bnez t6, .LBB15_115 +; RV32I-NEXT: j .LBB15_116 ; RV32I-NEXT: .LBB15_109: -; RV32I-NEXT: mv s2, s5 -; RV32I-NEXT: mv s1, t6 -; RV32I-NEXT: mv s6, t1 -; RV32I-NEXT: mv a0, t2 +; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a7, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_92 ; RV32I-NEXT: .LBB15_110: -; RV32I-NEXT: srli a4, ra, 16 -; RV32I-NEXT: lui t2, 16 -; RV32I-NEXT: srli t1, ra, 24 -; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: srli t4, a3, 24 -; RV32I-NEXT: srli t0, a7, 16 -; RV32I-NEXT: srli s0, a7, 24 -; RV32I-NEXT: srli t3, a6, 16 -; RV32I-NEXT: srli s3, a6, 24 -; RV32I-NEXT: srli t6, s2, 16 -; RV32I-NEXT: srli a1, s2, 24 -; RV32I-NEXT: srli t5, s1, 16 -; RV32I-NEXT: srli s5, s1, 24 -; RV32I-NEXT: srli s4, s6, 16 -; RV32I-NEXT: srli s7, s6, 24 -; RV32I-NEXT: srli s8, a0, 16 -; RV32I-NEXT: srli s9, a0, 24 -; RV32I-NEXT: addi t2, t2, -1 -; RV32I-NEXT: and s10, ra, t2 -; RV32I-NEXT: and s11, a3, t2 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: bne a5, s8, .LBB15_93 +; RV32I-NEXT: .LBB15_111: +; RV32I-NEXT: or a7, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_94 +; RV32I-NEXT: .LBB15_112: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne a5, s0, .LBB15_95 +; RV32I-NEXT: .LBB15_113: +; RV32I-NEXT: or a7, s10, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne a5, s4, .LBB15_96 +; RV32I-NEXT: j .LBB15_97 +; RV32I-NEXT: .LBB15_114: +; RV32I-NEXT: or t1, a3, t1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_116 +; RV32I-NEXT: .LBB15_115: +; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: .LBB15_116: +; RV32I-NEXT: beq a5, s7, .LBB15_136 +; RV32I-NEXT: # %bb.117: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_137 +; RV32I-NEXT: .LBB15_118: +; RV32I-NEXT: beq a5, s8, .LBB15_138 +; RV32I-NEXT: .LBB15_119: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_139 +; RV32I-NEXT: .LBB15_120: +; RV32I-NEXT: beq a5, s0, .LBB15_140 +; RV32I-NEXT: .LBB15_121: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_141 +; RV32I-NEXT: .LBB15_122: +; RV32I-NEXT: bne a5, s4, .LBB15_124 +; RV32I-NEXT: .LBB15_123: +; RV32I-NEXT: or t1, s10, a3 +; RV32I-NEXT: .LBB15_124: +; RV32I-NEXT: lbu s0, 27(a0) +; RV32I-NEXT: mv t3, t5 +; RV32I-NEXT: beq a5, s5, .LBB15_126 +; RV32I-NEXT: # %bb.125: +; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: .LBB15_126: +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: lbu t4, 25(a0) +; RV32I-NEXT: lbu a3, 26(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: beq a5, s1, .LBB15_128 +; RV32I-NEXT: # %bb.127: +; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: .LBB15_128: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: or s0, s0, a3 +; RV32I-NEXT: slli a3, t4, 8 +; RV32I-NEXT: beq a5, ra, .LBB15_130 +; RV32I-NEXT: # %bb.129: +; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: .LBB15_130: +; RV32I-NEXT: or a3, a3, s1 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: mv ra, t0 +; RV32I-NEXT: beqz a1, .LBB15_132 +; RV32I-NEXT: # %bb.131: +; RV32I-NEXT: mv ra, t3 +; RV32I-NEXT: .LBB15_132: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: or t3, s0, a3 +; RV32I-NEXT: srl t0, t0, s3 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: beqz t6, .LBB15_134 +; RV32I-NEXT: # %bb.133: +; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: .LBB15_134: +; RV32I-NEXT: sll t1, t3, a1 +; RV32I-NEXT: li s0, 3 +; RV32I-NEXT: beqz a5, .LBB15_142 +; RV32I-NEXT: # %bb.135: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_143 +; RV32I-NEXT: j .LBB15_144 +; RV32I-NEXT: .LBB15_136: +; RV32I-NEXT: lw a7, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t1, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_118 +; RV32I-NEXT: .LBB15_137: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s8, .LBB15_119 +; RV32I-NEXT: .LBB15_138: +; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t1, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_120 +; RV32I-NEXT: .LBB15_139: +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: bne a5, s0, .LBB15_121 +; RV32I-NEXT: .LBB15_140: +; RV32I-NEXT: lw a7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t1, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_122 +; RV32I-NEXT: .LBB15_141: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: beq a5, s4, .LBB15_123 +; RV32I-NEXT: j .LBB15_124 +; RV32I-NEXT: .LBB15_142: +; RV32I-NEXT: or t4, t1, t4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_144 +; RV32I-NEXT: .LBB15_143: +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: .LBB15_144: +; RV32I-NEXT: beq a5, s7, .LBB15_166 +; RV32I-NEXT: # %bb.145: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_167 +; RV32I-NEXT: .LBB15_146: +; RV32I-NEXT: beq a5, s8, .LBB15_168 +; RV32I-NEXT: .LBB15_147: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_169 +; RV32I-NEXT: .LBB15_148: +; RV32I-NEXT: beq a5, s0, .LBB15_170 +; RV32I-NEXT: .LBB15_149: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_171 +; RV32I-NEXT: .LBB15_150: +; RV32I-NEXT: bne a5, s4, .LBB15_152 +; RV32I-NEXT: .LBB15_151: +; RV32I-NEXT: lw a7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: .LBB15_152: +; RV32I-NEXT: li a7, 1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_154 +; RV32I-NEXT: # %bb.153: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: .LBB15_154: +; RV32I-NEXT: li s7, 2 +; RV32I-NEXT: li s8, 3 +; RV32I-NEXT: bne a5, s5, .LBB15_156 +; RV32I-NEXT: # %bb.155: +; RV32I-NEXT: or t4, s10, a3 +; RV32I-NEXT: .LBB15_156: +; RV32I-NEXT: lbu s0, 31(a0) +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beq a5, s1, .LBB15_158 +; RV32I-NEXT: # %bb.157: +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: .LBB15_158: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: lbu s5, 29(a0) +; RV32I-NEXT: lbu s1, 30(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: li s4, 7 +; RV32I-NEXT: beq a5, s4, .LBB15_160 +; RV32I-NEXT: # %bb.159: +; RV32I-NEXT: mv t4, a3 +; RV32I-NEXT: .LBB15_160: +; RV32I-NEXT: lbu a3, 28(a0) +; RV32I-NEXT: slli s5, s5, 8 +; RV32I-NEXT: or s0, s0, s1 +; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: beqz a1, .LBB15_162 +; RV32I-NEXT: # %bb.161: +; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: .LBB15_162: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: or a3, s5, a3 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: beqz t6, .LBB15_164 +; RV32I-NEXT: # %bb.163: +; RV32I-NEXT: srl t4, t3, s3 +; RV32I-NEXT: .LBB15_164: +; RV32I-NEXT: or s3, s0, a3 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: beqz a5, .LBB15_172 +; RV32I-NEXT: # %bb.165: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_173 +; RV32I-NEXT: j .LBB15_174 +; RV32I-NEXT: .LBB15_166: +; RV32I-NEXT: lw a7, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_146 +; RV32I-NEXT: .LBB15_167: +; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s8, .LBB15_147 +; RV32I-NEXT: .LBB15_168: +; RV32I-NEXT: lw a7, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_148 +; RV32I-NEXT: .LBB15_169: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s0, .LBB15_149 +; RV32I-NEXT: .LBB15_170: +; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_150 +; RV32I-NEXT: .LBB15_171: +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: beq a5, s4, .LBB15_151 +; RV32I-NEXT: j .LBB15_152 +; RV32I-NEXT: .LBB15_172: +; RV32I-NEXT: sll a3, s3, a1 +; RV32I-NEXT: or t3, a3, t4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_174 +; RV32I-NEXT: .LBB15_173: +; RV32I-NEXT: mv a3, t0 +; RV32I-NEXT: .LBB15_174: +; RV32I-NEXT: beq a5, a7, .LBB15_189 +; RV32I-NEXT: # %bb.175: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_190 +; RV32I-NEXT: .LBB15_176: +; RV32I-NEXT: beq a5, s7, .LBB15_191 +; RV32I-NEXT: .LBB15_177: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_192 +; RV32I-NEXT: .LBB15_178: +; RV32I-NEXT: beq a5, s8, .LBB15_193 +; RV32I-NEXT: .LBB15_179: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_194 +; RV32I-NEXT: .LBB15_180: +; RV32I-NEXT: beq a5, s4, .LBB15_195 +; RV32I-NEXT: .LBB15_181: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_196 +; RV32I-NEXT: .LBB15_182: +; RV32I-NEXT: beq a5, s0, .LBB15_197 +; RV32I-NEXT: .LBB15_183: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB15_198 +; RV32I-NEXT: .LBB15_184: +; RV32I-NEXT: beq a5, s1, .LBB15_199 +; RV32I-NEXT: .LBB15_185: +; RV32I-NEXT: li a3, 7 +; RV32I-NEXT: bne a5, a3, .LBB15_200 +; RV32I-NEXT: .LBB15_186: +; RV32I-NEXT: beqz a1, .LBB15_188 +; RV32I-NEXT: .LBB15_187: +; RV32I-NEXT: mv s3, t5 +; RV32I-NEXT: .LBB15_188: +; RV32I-NEXT: srli a1, a4, 16 +; RV32I-NEXT: lui a7, 16 +; RV32I-NEXT: srli a6, a4, 24 +; RV32I-NEXT: srli a3, t2, 16 +; RV32I-NEXT: srli t1, t2, 24 +; RV32I-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: srli a5, s2, 16 +; RV32I-NEXT: srli t5, s2, 24 +; RV32I-NEXT: srli t0, s6, 16 +; RV32I-NEXT: srli t6, s6, 24 +; RV32I-NEXT: srli t4, s9, 16 +; RV32I-NEXT: srli s4, s9, 24 +; RV32I-NEXT: srli t3, ra, 16 +; RV32I-NEXT: srli s1, ra, 24 +; RV32I-NEXT: srli s0, a0, 16 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli s7, s3, 16 +; RV32I-NEXT: srli s8, s3, 24 +; RV32I-NEXT: addi a7, a7, -1 +; RV32I-NEXT: and s10, a4, a7 +; RV32I-NEXT: and s11, t2, a7 ; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb ra, 0(a2) +; RV32I-NEXT: sb a4, 0(a2) ; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t1, 3(a2) -; RV32I-NEXT: and a4, a7, t2 -; RV32I-NEXT: srli t1, s11, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb t1, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t4, 7(a2) -; RV32I-NEXT: and a3, a6, t2 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t0, 10(a2) -; RV32I-NEXT: sb s0, 11(a2) -; RV32I-NEXT: and a4, s2, t2 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a6, 3(a2) +; RV32I-NEXT: and a1, s2, a7 +; RV32I-NEXT: srli a4, s11, 8 +; RV32I-NEXT: sb t2, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: sb t1, 7(a2) +; RV32I-NEXT: and a3, s6, a7 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb s2, 8(a2) +; RV32I-NEXT: sb a1, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb t5, 11(a2) +; RV32I-NEXT: and a1, s9, a7 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb s6, 12(a2) ; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb t3, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a3, s1, t2 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s2, 16(a2) -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: sb t6, 18(a2) -; RV32I-NEXT: sb a1, 19(a2) -; RV32I-NEXT: and a1, s6, t2 -; RV32I-NEXT: and a4, a0, t2 +; RV32I-NEXT: sb t0, 14(a2) +; RV32I-NEXT: sb t6, 15(a2) +; RV32I-NEXT: and a3, ra, a7 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb s9, 16(a2) +; RV32I-NEXT: sb a1, 17(a2) +; RV32I-NEXT: sb t4, 18(a2) +; RV32I-NEXT: sb s4, 19(a2) +; RV32I-NEXT: and a1, a0, a7 +; RV32I-NEXT: and a4, s3, a7 ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s1, 20(a2) +; RV32I-NEXT: sb ra, 20(a2) ; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t5, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) -; RV32I-NEXT: sb s6, 24(a2) +; RV32I-NEXT: sb t3, 22(a2) +; RV32I-NEXT: sb s1, 23(a2) +; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s7, 27(a2) -; RV32I-NEXT: sb a0, 28(a2) +; RV32I-NEXT: sb s0, 26(a2) +; RV32I-NEXT: sb s5, 27(a2) +; RV32I-NEXT: sb s3, 28(a2) ; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s8, 30(a2) -; RV32I-NEXT: sb s9, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: sb s7, 30(a2) +; RV32I-NEXT: sb s8, 31(a2) +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret +; RV32I-NEXT: .LBB15_189: +; RV32I-NEXT: or t3, t1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_176 +; RV32I-NEXT: .LBB15_190: +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: bne a5, s7, .LBB15_177 +; RV32I-NEXT: .LBB15_191: +; RV32I-NEXT: lw a6, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_178 +; RV32I-NEXT: .LBB15_192: +; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s8, .LBB15_179 +; RV32I-NEXT: .LBB15_193: +; RV32I-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_180 +; RV32I-NEXT: .LBB15_194: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s4, .LBB15_181 +; RV32I-NEXT: .LBB15_195: +; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_182 +; RV32I-NEXT: .LBB15_196: +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: bne a5, s0, .LBB15_183 +; RV32I-NEXT: .LBB15_197: +; RV32I-NEXT: lw a6, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB15_184 +; RV32I-NEXT: .LBB15_198: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne a5, s1, .LBB15_185 +; RV32I-NEXT: .LBB15_199: +; RV32I-NEXT: or t3, s10, a3 +; RV32I-NEXT: li a3, 7 +; RV32I-NEXT: beq a5, a3, .LBB15_186 +; RV32I-NEXT: .LBB15_200: +; RV32I-NEXT: mv t5, t3 +; RV32I-NEXT: bnez a1, .LBB15_187 +; RV32I-NEXT: j .LBB15_188 %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -6667,605 +7552,816 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; ; RV32I-LABEL: shl_32bytes_wordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu t1, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: lbu a6, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or a5, a7, a5 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or t0, t3, t2 -; RV32I-NEXT: lbu t1, 1(a1) -; RV32I-NEXT: lbu t2, 0(a1) -; RV32I-NEXT: lbu t3, 2(a1) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: li s9, 64 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a7, a7, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t3 -; RV32I-NEXT: li t4, 32 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or t3, a5, a4 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, a1, t1 -; RV32I-NEXT: slli a4, a4, 5 -; RV32I-NEXT: neg s10, a4 -; RV32I-NEXT: srl t5, t3, s10 -; RV32I-NEXT: sll s5, a5, a4 -; RV32I-NEXT: bltu a4, t4, .LBB16_2 +; RV32I-NEXT: or a6, a5, a3 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a1, a1, 5 +; RV32I-NEXT: srli a5, a1, 5 +; RV32I-NEXT: sll t5, a6, a1 +; RV32I-NEXT: li s9, 1 +; RV32I-NEXT: mv a4, t5 +; RV32I-NEXT: beqz a5, .LBB16_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: sll a7, t3, a4 -; RV32I-NEXT: j .LBB16_3 +; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: .LBB16_2: -; RV32I-NEXT: sll s8, t3, a4 -; RV32I-NEXT: or a7, t5, s5 -; RV32I-NEXT: .LBB16_3: -; RV32I-NEXT: lbu t2, 9(a0) -; RV32I-NEXT: lbu a1, 10(a0) -; RV32I-NEXT: lbu t1, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t6, a3, 8 -; RV32I-NEXT: sub s6, s9, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a4, .LBB16_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: .LBB16_5: -; RV32I-NEXT: slli a7, t2, 8 -; RV32I-NEXT: or a6, a6, a1 -; RV32I-NEXT: lbu t2, 8(a0) -; RV32I-NEXT: lbu a1, 12(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t6, t0 -; RV32I-NEXT: neg t6, s6 -; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s6, t4, .LBB16_7 -; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srl t6, a5, s6 -; RV32I-NEXT: j .LBB16_8 -; RV32I-NEXT: .LBB16_7: -; RV32I-NEXT: sll t6, a5, t6 -; RV32I-NEXT: or t6, t5, t6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li s0, 2 +; RV32I-NEXT: beq a5, s9, .LBB16_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: .LBB16_4: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: beq a5, s0, .LBB16_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: .LBB16_6: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li s6, 3 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: beq a5, s6, .LBB16_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB16_8: -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: slli t2, a6, 16 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: slli t0, t0, 16 -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: beqz s6, .LBB16_10 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: li s11, 5 +; RV32I-NEXT: beq a5, s4, .LBB16_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a6, t6 +; RV32I-NEXT: mv t0, a4 ; RV32I-NEXT: .LBB16_10: -; RV32I-NEXT: or t1, t2, a7 -; RV32I-NEXT: or t2, t0, a1 -; RV32I-NEXT: bltu s6, t4, .LBB16_12 -; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: j .LBB16_13 +; RV32I-NEXT: beq a5, s11, .LBB16_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB16_12: -; RV32I-NEXT: srl a7, a5, s10 -; RV32I-NEXT: .LBB16_13: -; RV32I-NEXT: srl s0, t1, s10 -; RV32I-NEXT: sll a1, t2, a4 -; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t4, .LBB16_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sll a1, t1, a4 -; RV32I-NEXT: j .LBB16_16 -; RV32I-NEXT: .LBB16_15: -; RV32I-NEXT: sll s1, t1, a4 -; RV32I-NEXT: or a1, s0, a1 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: lbu t0, 5(a0) +; RV32I-NEXT: lbu t1, 6(a0) +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a5, s1, .LBB16_14 +; RV32I-NEXT: # %bb.13: +; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: .LBB16_14: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: li ra, 7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: beq a5, ra, .LBB16_16 +; RV32I-NEXT: # %bb.15: +; RV32I-NEXT: mv a7, a4 ; RV32I-NEXT: .LBB16_16: -; RV32I-NEXT: addi s7, a4, -64 -; RV32I-NEXT: mv s3, t2 -; RV32I-NEXT: beqz a4, .LBB16_18 +; RV32I-NEXT: or a3, t0, a3 +; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: andi t6, a1, 31 +; RV32I-NEXT: mv a4, a6 +; RV32I-NEXT: beqz a1, .LBB16_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB16_18: -; RV32I-NEXT: neg a1, s7 -; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s7, t4, .LBB16_20 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a7, t1, a3 +; RV32I-NEXT: neg s3, t6 +; RV32I-NEXT: srl s5, a6, s3 +; RV32I-NEXT: beqz t6, .LBB16_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: sll a1, t3, s7 -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: bnez s7, .LBB16_21 -; RV32I-NEXT: j .LBB16_22 +; RV32I-NEXT: mv t0, s5 ; RV32I-NEXT: .LBB16_20: -; RV32I-NEXT: sll s2, t3, a4 -; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: or a1, a1, s5 -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: beqz s7, .LBB16_22 -; RV32I-NEXT: .LBB16_21: -; RV32I-NEXT: mv s4, a1 -; RV32I-NEXT: .LBB16_22: -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bltu a4, s9, .LBB16_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sll s7, a7, a1 +; RV32I-NEXT: beqz a5, .LBB16_22 +; RV32I-NEXT: # %bb.21: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: j .LBB16_25 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: bne a5, s9, .LBB16_23 +; RV32I-NEXT: j .LBB16_24 +; RV32I-NEXT: .LBB16_22: +; RV32I-NEXT: or a3, s7, t0 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beq a5, s9, .LBB16_24 +; RV32I-NEXT: .LBB16_23: +; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: .LBB16_24: -; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, a6, s1 -; RV32I-NEXT: or s4, a7, s3 -; RV32I-NEXT: .LBB16_25: -; RV32I-NEXT: sub ra, a1, a4 -; RV32I-NEXT: mv a7, t1 -; RV32I-NEXT: mv a6, t2 -; RV32I-NEXT: beqz a4, .LBB16_27 -; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: mv a7, s2 -; RV32I-NEXT: mv a6, s4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s0, .LBB16_40 +; RV32I-NEXT: # %bb.25: +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bne a5, s6, .LBB16_41 +; RV32I-NEXT: .LBB16_26: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s4, .LBB16_28 ; RV32I-NEXT: .LBB16_27: -; RV32I-NEXT: neg s1, ra -; RV32I-NEXT: sll s2, t2, s1 -; RV32I-NEXT: bltu ra, t4, .LBB16_29 -; RV32I-NEXT: # %bb.28: -; RV32I-NEXT: srl a1, t2, ra -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bnez ra, .LBB16_30 -; RV32I-NEXT: j .LBB16_31 -; RV32I-NEXT: .LBB16_29: -; RV32I-NEXT: or a1, s0, s2 -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz ra, .LBB16_31 +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: .LBB16_28: +; RV32I-NEXT: lbu t2, 11(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s11, .LBB16_30 +; RV32I-NEXT: # %bb.29: +; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB16_30: -; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB16_31: -; RV32I-NEXT: bltu ra, t4, .LBB16_33 -; RV32I-NEXT: # %bb.32: -; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: srl a1, a5, ra -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: bnez ra, .LBB16_34 -; RV32I-NEXT: j .LBB16_35 -; RV32I-NEXT: .LBB16_33: -; RV32I-NEXT: srl a1, t2, s10 -; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sll a1, a5, s1 -; RV32I-NEXT: or a1, t5, a1 -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: beqz ra, .LBB16_35 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t1, 9(a0) +; RV32I-NEXT: lbu a3, 10(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a5, s1, .LBB16_32 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: mv a6, t0 +; RV32I-NEXT: .LBB16_32: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t3, 8(a0) +; RV32I-NEXT: or t2, t2, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB16_34 +; RV32I-NEXT: # %bb.33: +; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB16_34: -; RV32I-NEXT: mv t5, a1 -; RV32I-NEXT: .LBB16_35: -; RV32I-NEXT: sub s3, s9, ra -; RV32I-NEXT: bltu ra, t4, .LBB16_38 -; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s3, t4, .LBB16_39 -; RV32I-NEXT: .LBB16_37: -; RV32I-NEXT: sll s1, t1, s1 -; RV32I-NEXT: neg a1, s3 -; RV32I-NEXT: srl a1, t1, a1 -; RV32I-NEXT: or a1, a1, s2 -; RV32I-NEXT: j .LBB16_40 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli a6, t2, 16 +; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: beqz a1, .LBB16_36 +; RV32I-NEXT: # %bb.35: +; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: .LBB16_36: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a6, a6, a3 +; RV32I-NEXT: srl s8, a7, s3 +; RV32I-NEXT: beqz t6, .LBB16_38 +; RV32I-NEXT: # %bb.37: +; RV32I-NEXT: mv t0, s8 ; RV32I-NEXT: .LBB16_38: -; RV32I-NEXT: srl a1, a5, s10 -; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s3, t4, .LBB16_37 -; RV32I-NEXT: .LBB16_39: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sll a1, t1, s3 +; RV32I-NEXT: sll s10, a6, a1 +; RV32I-NEXT: beqz a5, .LBB16_42 +; RV32I-NEXT: # %bb.39: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_43 +; RV32I-NEXT: j .LBB16_44 ; RV32I-NEXT: .LBB16_40: -; RV32I-NEXT: addi s4, ra, -64 -; RV32I-NEXT: mv s2, t2 -; RV32I-NEXT: beqz s3, .LBB16_42 -; RV32I-NEXT: # %bb.41: -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beq a5, s6, .LBB16_26 +; RV32I-NEXT: .LBB16_41: +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s4, .LBB16_27 +; RV32I-NEXT: j .LBB16_28 ; RV32I-NEXT: .LBB16_42: -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s5, a7 -; RV32I-NEXT: bltu s4, t4, .LBB16_44 -; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: srl t0, t2, s4 -; RV32I-NEXT: j .LBB16_45 +; RV32I-NEXT: or a7, s10, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_44 +; RV32I-NEXT: .LBB16_43: +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: .LBB16_44: -; RV32I-NEXT: srl a1, t1, ra -; RV32I-NEXT: neg t0, s4 -; RV32I-NEXT: sll t0, t2, t0 -; RV32I-NEXT: or t0, a1, t0 -; RV32I-NEXT: .LBB16_45: -; RV32I-NEXT: mv s0, s10 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: lbu s8, 19(a0) -; RV32I-NEXT: lbu a1, 23(a0) -; RV32I-NEXT: mv s3, t1 -; RV32I-NEXT: beqz s4, .LBB16_47 -; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: mv s3, t0 +; RV32I-NEXT: beq a5, s9, .LBB16_61 +; RV32I-NEXT: # %bb.45: +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne a5, s0, .LBB16_62 +; RV32I-NEXT: .LBB16_46: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: bne a5, s6, .LBB16_63 ; RV32I-NEXT: .LBB16_47: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: lbu s10, 17(a0) -; RV32I-NEXT: lbu t0, 18(a0) -; RV32I-NEXT: lbu s9, 21(a0) -; RV32I-NEXT: lbu t6, 22(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: bltu s4, t4, .LBB16_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: j .LBB16_50 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s4, .LBB16_49 +; RV32I-NEXT: .LBB16_48: +; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB16_49: -; RV32I-NEXT: srl s4, t2, ra -; RV32I-NEXT: .LBB16_50: -; RV32I-NEXT: or s11, s8, t0 -; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu s8, 20(a0) -; RV32I-NEXT: slli s10, s10, 8 -; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: or t6, a1, t6 -; RV32I-NEXT: bgeu ra, a3, .LBB16_52 -; RV32I-NEXT: # %bb.51: -; RV32I-NEXT: or s3, t5, s1 -; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s4, a1, s2 -; RV32I-NEXT: .LBB16_52: -; RV32I-NEXT: or a1, s10, t0 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: or t0, s9, s8 -; RV32I-NEXT: slli t6, t6, 16 -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: beqz ra, .LBB16_54 -; RV32I-NEXT: # %bb.53: -; RV32I-NEXT: mv t5, s3 -; RV32I-NEXT: mv s1, s4 -; RV32I-NEXT: .LBB16_54: -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, s11, a1 -; RV32I-NEXT: or s1, t6, t0 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv a6, a7 -; RV32I-NEXT: mv a7, s0 -; RV32I-NEXT: bltu ra, a1, .LBB16_56 -; RV32I-NEXT: # %bb.55: -; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB16_56: -; RV32I-NEXT: srl s3, s2, a7 -; RV32I-NEXT: sll ra, s1, a4 -; RV32I-NEXT: mv a7, s5 -; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t4, .LBB16_58 -; RV32I-NEXT: # %bb.57: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sll a1, s2, a4 -; RV32I-NEXT: j .LBB16_59 -; RV32I-NEXT: .LBB16_58: -; RV32I-NEXT: sll a1, s2, a4 -; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: or a1, s3, ra +; RV32I-NEXT: lbu t3, 15(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s11, .LBB16_51 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB16_51: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu t1, 13(a0) +; RV32I-NEXT: lbu a3, 14(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s1, .LBB16_53 +; RV32I-NEXT: # %bb.52: +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: .LBB16_53: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 12(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB16_55 +; RV32I-NEXT: # %bb.54: +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: .LBB16_55: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s2, a6 +; RV32I-NEXT: beqz a1, .LBB16_57 +; RV32I-NEXT: # %bb.56: +; RV32I-NEXT: mv s2, t0 +; RV32I-NEXT: .LBB16_57: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a7, t3, a3 +; RV32I-NEXT: srl a3, a6, s3 +; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t6, .LBB16_59 +; RV32I-NEXT: # %bb.58: +; RV32I-NEXT: lw t0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB16_59: -; RV32I-NEXT: lbu s9, 27(a0) -; RV32I-NEXT: lbu t6, 31(a0) -; RV32I-NEXT: mv t5, s1 -; RV32I-NEXT: beqz a4, .LBB16_61 +; RV32I-NEXT: sll a3, a7, a1 +; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a5, .LBB16_64 ; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: mv t5, a1 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_65 +; RV32I-NEXT: j .LBB16_66 ; RV32I-NEXT: .LBB16_61: -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s4, 26(a0) -; RV32I-NEXT: lbu s11, 29(a0) -; RV32I-NEXT: lbu s10, 30(a0) -; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: bltu s6, t4, .LBB16_63 -; RV32I-NEXT: # %bb.62: -; RV32I-NEXT: srl t0, s1, s6 -; RV32I-NEXT: j .LBB16_64 +; RV32I-NEXT: or a7, s7, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beq a5, s0, .LBB16_46 +; RV32I-NEXT: .LBB16_62: +; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: beq a5, s6, .LBB16_47 ; RV32I-NEXT: .LBB16_63: -; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, s1, a1 -; RV32I-NEXT: or t0, s3, a1 +; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s4, .LBB16_48 +; RV32I-NEXT: j .LBB16_49 ; RV32I-NEXT: .LBB16_64: -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: lbu s3, 24(a0) -; RV32I-NEXT: lbu a1, 28(a0) -; RV32I-NEXT: or s4, s9, s4 -; RV32I-NEXT: slli s11, s11, 8 -; RV32I-NEXT: or t6, t6, s10 -; RV32I-NEXT: mv s9, s2 -; RV32I-NEXT: beqz s6, .LBB16_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: or a6, a3, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_66 +; RV32I-NEXT: .LBB16_65: +; RV32I-NEXT: mv a3, s8 ; RV32I-NEXT: .LBB16_66: -; RV32I-NEXT: or a0, s8, s3 -; RV32I-NEXT: slli t0, s4, 16 -; RV32I-NEXT: or a1, s11, a1 -; RV32I-NEXT: slli t6, t6, 16 -; RV32I-NEXT: bltu s6, t4, .LBB16_68 +; RV32I-NEXT: beq a5, s9, .LBB16_84 ; RV32I-NEXT: # %bb.67: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: j .LBB16_69 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_85 ; RV32I-NEXT: .LBB16_68: -; RV32I-NEXT: srl s4, s1, s0 +; RV32I-NEXT: beq a5, s0, .LBB16_86 ; RV32I-NEXT: .LBB16_69: -; RV32I-NEXT: li s11, 64 -; RV32I-NEXT: or s6, t0, a0 -; RV32I-NEXT: or a0, t6, a1 -; RV32I-NEXT: bltu a4, t4, .LBB16_71 -; RV32I-NEXT: # %bb.70: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a1, s6, a4 -; RV32I-NEXT: mv s10, a0 -; RV32I-NEXT: bnez a4, .LBB16_72 -; RV32I-NEXT: j .LBB16_73 +; RV32I-NEXT: mv t0, t5 +; RV32I-NEXT: bne a5, s6, .LBB16_87 +; RV32I-NEXT: .LBB16_70: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s4, .LBB16_72 ; RV32I-NEXT: .LBB16_71: -; RV32I-NEXT: sll s3, s6, a4 -; RV32I-NEXT: srl a1, s6, s0 -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: mv s10, a0 -; RV32I-NEXT: beqz a4, .LBB16_73 +; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB16_72: -; RV32I-NEXT: mv s10, a1 -; RV32I-NEXT: .LBB16_73: -; RV32I-NEXT: bltu s7, t4, .LBB16_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: sll a1, s2, s7 -; RV32I-NEXT: mv s0, s1 -; RV32I-NEXT: bnez s7, .LBB16_76 -; RV32I-NEXT: j .LBB16_77 -; RV32I-NEXT: .LBB16_75: -; RV32I-NEXT: sll s5, s2, a4 -; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a1, s2, a1 -; RV32I-NEXT: or a1, a1, ra -; RV32I-NEXT: mv s0, s1 -; RV32I-NEXT: beqz s7, .LBB16_77 +; RV32I-NEXT: lbu t3, 19(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s11, .LBB16_74 +; RV32I-NEXT: # %bb.73: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB16_74: +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: lbu a3, 18(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s1, .LBB16_76 +; RV32I-NEXT: # %bb.75: +; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB16_76: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB16_77: -; RV32I-NEXT: bltu a4, s11, .LBB16_79 -; RV32I-NEXT: # %bb.78: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: li t5, 0 -; RV32I-NEXT: j .LBB16_80 -; RV32I-NEXT: .LBB16_79: -; RV32I-NEXT: or s5, s9, s3 -; RV32I-NEXT: or s0, s4, s10 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 16(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB16_78 +; RV32I-NEXT: # %bb.77: +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: .LBB16_78: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s6, a7 +; RV32I-NEXT: beqz a1, .LBB16_80 +; RV32I-NEXT: # %bb.79: +; RV32I-NEXT: mv s6, t0 ; RV32I-NEXT: .LBB16_80: -; RV32I-NEXT: addi s9, a4, -128 -; RV32I-NEXT: mv s7, s6 -; RV32I-NEXT: mv s8, a0 -; RV32I-NEXT: beqz a4, .LBB16_82 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a6, t3, a3 +; RV32I-NEXT: srl a3, a7, s3 +; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t6, .LBB16_82 ; RV32I-NEXT: # %bb.81: -; RV32I-NEXT: mv s7, s5 -; RV32I-NEXT: mv s8, s0 +; RV32I-NEXT: lw t0, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB16_82: -; RV32I-NEXT: neg s3, s9 -; RV32I-NEXT: srl s0, t3, s3 -; RV32I-NEXT: bltu s9, t4, .LBB16_84 +; RV32I-NEXT: sll a3, a6, a1 +; RV32I-NEXT: sw a3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a5, .LBB16_88 ; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: sll a1, t3, s9 -; RV32I-NEXT: j .LBB16_85 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_89 +; RV32I-NEXT: j .LBB16_90 ; RV32I-NEXT: .LBB16_84: -; RV32I-NEXT: sll s5, t3, a4 -; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, s0, a1 +; RV32I-NEXT: or a6, s10, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_68 ; RV32I-NEXT: .LBB16_85: -; RV32I-NEXT: sub s4, s11, s9 -; RV32I-NEXT: mv t6, a5 -; RV32I-NEXT: beqz s9, .LBB16_87 -; RV32I-NEXT: # %bb.86: -; RV32I-NEXT: mv t6, a1 +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: bne a5, s0, .LBB16_69 +; RV32I-NEXT: .LBB16_86: +; RV32I-NEXT: or a6, s7, a3 +; RV32I-NEXT: mv t0, t5 +; RV32I-NEXT: beq a5, s6, .LBB16_70 ; RV32I-NEXT: .LBB16_87: -; RV32I-NEXT: bltu s4, t4, .LBB16_89 -; RV32I-NEXT: # %bb.88: -; RV32I-NEXT: srl a1, a5, s4 -; RV32I-NEXT: mv s0, t3 -; RV32I-NEXT: bnez s4, .LBB16_90 -; RV32I-NEXT: j .LBB16_91 +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s4, .LBB16_71 +; RV32I-NEXT: j .LBB16_72 +; RV32I-NEXT: .LBB16_88: +; RV32I-NEXT: or a7, a3, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_90 ; RV32I-NEXT: .LBB16_89: -; RV32I-NEXT: neg a1, s4 -; RV32I-NEXT: sll a1, a5, a1 -; RV32I-NEXT: or a1, s0, a1 -; RV32I-NEXT: mv s0, t3 -; RV32I-NEXT: beqz s4, .LBB16_91 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB16_90: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB16_91: -; RV32I-NEXT: bltu s4, t4, .LBB16_94 -; RV32I-NEXT: # %bb.92: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: bgeu s9, t4, .LBB16_95 +; RV32I-NEXT: beq a5, s9, .LBB16_109 +; RV32I-NEXT: # %bb.91: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_110 +; RV32I-NEXT: .LBB16_92: +; RV32I-NEXT: beq a5, s0, .LBB16_111 ; RV32I-NEXT: .LBB16_93: -; RV32I-NEXT: sll s10, t1, a4 -; RV32I-NEXT: srl a1, t1, s3 -; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: j .LBB16_96 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_112 ; RV32I-NEXT: .LBB16_94: -; RV32I-NEXT: srl s4, a5, s3 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: bltu s9, t4, .LBB16_93 +; RV32I-NEXT: li t0, 3 +; RV32I-NEXT: beq a5, t0, .LBB16_113 ; RV32I-NEXT: .LBB16_95: -; RV32I-NEXT: li s10, 0 -; RV32I-NEXT: sll a1, t1, s9 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beq a5, s4, .LBB16_97 ; RV32I-NEXT: .LBB16_96: -; RV32I-NEXT: addi s11, s9, -64 -; RV32I-NEXT: mv s3, t2 -; RV32I-NEXT: beqz s9, .LBB16_98 -; RV32I-NEXT: # %bb.97: -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: .LBB16_98: -; RV32I-NEXT: bltu s11, t4, .LBB16_100 -; RV32I-NEXT: # %bb.99: +; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: .LBB16_97: +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s11, .LBB16_99 +; RV32I-NEXT: # %bb.98: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB16_99: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu t1, 21(a0) +; RV32I-NEXT: lbu a3, 22(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s1, .LBB16_101 +; RV32I-NEXT: # %bb.100: +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: .LBB16_101: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 20(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB16_103 +; RV32I-NEXT: # %bb.102: +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: .LBB16_103: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s9, a6 +; RV32I-NEXT: beqz a1, .LBB16_105 +; RV32I-NEXT: # %bb.104: +; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: .LBB16_105: +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: or t0, t3, a3 +; RV32I-NEXT: srl a6, a6, s3 +; RV32I-NEXT: beqz t6, .LBB16_107 +; RV32I-NEXT: # %bb.106: +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: .LBB16_107: +; RV32I-NEXT: sll a7, t0, a1 +; RV32I-NEXT: beqz a5, .LBB16_114 +; RV32I-NEXT: # %bb.108: +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_115 +; RV32I-NEXT: j .LBB16_116 +; RV32I-NEXT: .LBB16_109: +; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a7, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_92 +; RV32I-NEXT: .LBB16_110: +; RV32I-NEXT: mv a3, s8 +; RV32I-NEXT: bne a5, s0, .LBB16_93 +; RV32I-NEXT: .LBB16_111: +; RV32I-NEXT: or a7, s10, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_94 +; RV32I-NEXT: .LBB16_112: +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: li t0, 3 +; RV32I-NEXT: bne a5, t0, .LBB16_95 +; RV32I-NEXT: .LBB16_113: +; RV32I-NEXT: or a7, s7, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne a5, s4, .LBB16_96 +; RV32I-NEXT: j .LBB16_97 +; RV32I-NEXT: .LBB16_114: +; RV32I-NEXT: or t1, a7, t1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_116 +; RV32I-NEXT: .LBB16_115: +; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: .LBB16_116: +; RV32I-NEXT: li t3, 1 +; RV32I-NEXT: beq a5, t3, .LBB16_136 +; RV32I-NEXT: # %bb.117: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_137 +; RV32I-NEXT: .LBB16_118: +; RV32I-NEXT: beq a5, s0, .LBB16_138 +; RV32I-NEXT: .LBB16_119: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_139 +; RV32I-NEXT: .LBB16_120: +; RV32I-NEXT: li t3, 3 +; RV32I-NEXT: beq a5, t3, .LBB16_140 +; RV32I-NEXT: .LBB16_121: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_141 +; RV32I-NEXT: .LBB16_122: +; RV32I-NEXT: bne a5, s4, .LBB16_124 +; RV32I-NEXT: .LBB16_123: +; RV32I-NEXT: or t1, s7, a3 +; RV32I-NEXT: .LBB16_124: +; RV32I-NEXT: sw t2, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu s0, 27(a0) +; RV32I-NEXT: mv t3, t5 +; RV32I-NEXT: beq a5, s11, .LBB16_126 +; RV32I-NEXT: # %bb.125: +; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: .LBB16_126: +; RV32I-NEXT: mv t2, s10 +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: lbu t4, 25(a0) +; RV32I-NEXT: lbu a3, 26(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: beq a5, s1, .LBB16_128 +; RV32I-NEXT: # %bb.127: +; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: .LBB16_128: +; RV32I-NEXT: mv s10, s8 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: or s0, s0, a3 +; RV32I-NEXT: slli a3, t4, 8 +; RV32I-NEXT: beq a5, ra, .LBB16_130 +; RV32I-NEXT: # %bb.129: +; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: .LBB16_130: +; RV32I-NEXT: li s8, 4 +; RV32I-NEXT: or a3, a3, s1 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: mv ra, t0 +; RV32I-NEXT: beqz a1, .LBB16_132 +; RV32I-NEXT: # %bb.131: +; RV32I-NEXT: mv ra, t3 +; RV32I-NEXT: .LBB16_132: +; RV32I-NEXT: li s4, 5 +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: or t3, s0, a3 +; RV32I-NEXT: srl t0, t0, s3 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: beqz t6, .LBB16_134 +; RV32I-NEXT: # %bb.133: +; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: .LBB16_134: +; RV32I-NEXT: mv s11, a4 +; RV32I-NEXT: sll t1, t3, a1 +; RV32I-NEXT: li s0, 2 +; RV32I-NEXT: mv a4, s7 +; RV32I-NEXT: beqz a5, .LBB16_142 +; RV32I-NEXT: # %bb.135: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: j .LBB16_143 +; RV32I-NEXT: .LBB16_136: +; RV32I-NEXT: lw t1, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t1, t1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_118 +; RV32I-NEXT: .LBB16_137: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s0, .LBB16_119 +; RV32I-NEXT: .LBB16_138: +; RV32I-NEXT: lw t1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t1, t1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_120 +; RV32I-NEXT: .LBB16_139: +; RV32I-NEXT: mv a3, s8 +; RV32I-NEXT: li t3, 3 +; RV32I-NEXT: bne a5, t3, .LBB16_121 +; RV32I-NEXT: .LBB16_140: +; RV32I-NEXT: or t1, s10, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_122 +; RV32I-NEXT: .LBB16_141: +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: beq a5, s4, .LBB16_123 +; RV32I-NEXT: j .LBB16_124 +; RV32I-NEXT: .LBB16_142: +; RV32I-NEXT: or t4, t1, t4 +; RV32I-NEXT: .LBB16_143: +; RV32I-NEXT: mv s7, s5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_145 +; RV32I-NEXT: # %bb.144: +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: .LBB16_145: +; RV32I-NEXT: li s5, 1 +; RV32I-NEXT: bne a5, s5, .LBB16_147 +; RV32I-NEXT: # %bb.146: +; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: .LBB16_147: +; RV32I-NEXT: mv s5, s7 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_166 +; RV32I-NEXT: # %bb.148: +; RV32I-NEXT: beq a5, s0, .LBB16_167 +; RV32I-NEXT: .LBB16_149: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_168 +; RV32I-NEXT: .LBB16_150: +; RV32I-NEXT: li s0, 3 +; RV32I-NEXT: beq a5, s0, .LBB16_169 +; RV32I-NEXT: .LBB16_151: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_170 +; RV32I-NEXT: .LBB16_152: +; RV32I-NEXT: beq a5, s8, .LBB16_171 +; RV32I-NEXT: .LBB16_153: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_172 +; RV32I-NEXT: .LBB16_154: +; RV32I-NEXT: li s8, 2 +; RV32I-NEXT: bne a5, s4, .LBB16_156 +; RV32I-NEXT: .LBB16_155: +; RV32I-NEXT: or t4, a4, a3 +; RV32I-NEXT: .LBB16_156: +; RV32I-NEXT: lbu s0, 31(a0) +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beq a5, s1, .LBB16_158 +; RV32I-NEXT: # %bb.157: +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: .LBB16_158: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: lbu s5, 29(a0) +; RV32I-NEXT: lbu s1, 30(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: li s4, 7 +; RV32I-NEXT: beq a5, s4, .LBB16_160 +; RV32I-NEXT: # %bb.159: +; RV32I-NEXT: mv t4, a3 +; RV32I-NEXT: .LBB16_160: +; RV32I-NEXT: lbu a3, 28(a0) +; RV32I-NEXT: slli s5, s5, 8 +; RV32I-NEXT: or s0, s0, s1 +; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: beqz a1, .LBB16_162 +; RV32I-NEXT: # %bb.161: +; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: .LBB16_162: ; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: sll a1, t3, s11 -; RV32I-NEXT: bnez s11, .LBB16_101 -; RV32I-NEXT: j .LBB16_102 -; RV32I-NEXT: .LBB16_100: -; RV32I-NEXT: sll t4, t3, s9 -; RV32I-NEXT: neg a1, s11 -; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: sll t0, a5, s9 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: beqz s11, .LBB16_102 -; RV32I-NEXT: .LBB16_101: -; RV32I-NEXT: mv a5, a1 -; RV32I-NEXT: .LBB16_102: -; RV32I-NEXT: bltu s9, ra, .LBB16_104 -; RV32I-NEXT: # %bb.103: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bnez s9, .LBB16_105 -; RV32I-NEXT: j .LBB16_106 -; RV32I-NEXT: .LBB16_104: -; RV32I-NEXT: or t4, s0, s10 -; RV32I-NEXT: or a5, s4, s3 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: beqz s9, .LBB16_106 -; RV32I-NEXT: .LBB16_105: -; RV32I-NEXT: mv t1, t4 -; RV32I-NEXT: mv t2, a5 -; RV32I-NEXT: .LBB16_106: -; RV32I-NEXT: bltu a4, a1, .LBB16_108 -; RV32I-NEXT: # %bb.107: -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: or a3, s5, a3 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: beqz t6, .LBB16_164 +; RV32I-NEXT: # %bb.163: +; RV32I-NEXT: srl t4, t3, s3 +; RV32I-NEXT: .LBB16_164: +; RV32I-NEXT: or s3, s0, a3 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: beqz a5, .LBB16_173 +; RV32I-NEXT: # %bb.165: +; RV32I-NEXT: li t3, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: bnez a4, .LBB16_109 -; RV32I-NEXT: j .LBB16_110 -; RV32I-NEXT: .LBB16_108: -; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s5, a1, a5 -; RV32I-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t6, a1, t5 -; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t1, a1, s7 -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t2, a1, s8 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a4, .LBB16_110 -; RV32I-NEXT: .LBB16_109: -; RV32I-NEXT: mv s2, s5 -; RV32I-NEXT: mv s1, t6 -; RV32I-NEXT: mv s6, t1 -; RV32I-NEXT: mv a0, t2 -; RV32I-NEXT: .LBB16_110: -; RV32I-NEXT: srli a4, ra, 16 -; RV32I-NEXT: lui t2, 16 -; RV32I-NEXT: srli t1, ra, 24 -; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: srli t4, a3, 24 -; RV32I-NEXT: srli t0, a7, 16 -; RV32I-NEXT: srli s0, a7, 24 -; RV32I-NEXT: srli t3, a6, 16 -; RV32I-NEXT: srli s3, a6, 24 -; RV32I-NEXT: srli t6, s2, 16 -; RV32I-NEXT: srli a1, s2, 24 -; RV32I-NEXT: srli t5, s1, 16 -; RV32I-NEXT: srli s5, s1, 24 -; RV32I-NEXT: srli s4, s6, 16 -; RV32I-NEXT: srli s7, s6, 24 -; RV32I-NEXT: srli s8, a0, 16 -; RV32I-NEXT: srli s9, a0, 24 -; RV32I-NEXT: addi t2, t2, -1 -; RV32I-NEXT: and s10, ra, t2 -; RV32I-NEXT: and s11, a3, t2 +; RV32I-NEXT: bnez t6, .LBB16_174 +; RV32I-NEXT: j .LBB16_175 +; RV32I-NEXT: .LBB16_166: +; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s0, .LBB16_149 +; RV32I-NEXT: .LBB16_167: +; RV32I-NEXT: lw t4, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_150 +; RV32I-NEXT: .LBB16_168: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s0, 3 +; RV32I-NEXT: bne a5, s0, .LBB16_151 +; RV32I-NEXT: .LBB16_169: +; RV32I-NEXT: lw t4, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_152 +; RV32I-NEXT: .LBB16_170: +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: bne a5, s8, .LBB16_153 +; RV32I-NEXT: .LBB16_171: +; RV32I-NEXT: or t4, t2, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_154 +; RV32I-NEXT: .LBB16_172: +; RV32I-NEXT: mv a3, s5 +; RV32I-NEXT: li s8, 2 +; RV32I-NEXT: beq a5, s4, .LBB16_155 +; RV32I-NEXT: j .LBB16_156 +; RV32I-NEXT: .LBB16_173: +; RV32I-NEXT: sll a3, s3, a1 +; RV32I-NEXT: or t3, a3, t4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_175 +; RV32I-NEXT: .LBB16_174: +; RV32I-NEXT: mv a3, t0 +; RV32I-NEXT: .LBB16_175: +; RV32I-NEXT: li t0, 1 +; RV32I-NEXT: beq a5, t0, .LBB16_195 +; RV32I-NEXT: # %bb.176: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_196 +; RV32I-NEXT: .LBB16_177: +; RV32I-NEXT: bne a5, s8, .LBB16_179 +; RV32I-NEXT: .LBB16_178: +; RV32I-NEXT: or t3, a7, a3 +; RV32I-NEXT: .LBB16_179: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 3 +; RV32I-NEXT: bnez t6, .LBB16_197 +; RV32I-NEXT: # %bb.180: +; RV32I-NEXT: beq a5, a6, .LBB16_198 +; RV32I-NEXT: .LBB16_181: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_199 +; RV32I-NEXT: .LBB16_182: +; RV32I-NEXT: beq a5, s4, .LBB16_200 +; RV32I-NEXT: .LBB16_183: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB16_201 +; RV32I-NEXT: .LBB16_184: +; RV32I-NEXT: bne a5, s0, .LBB16_186 +; RV32I-NEXT: .LBB16_185: +; RV32I-NEXT: or t3, t2, a3 +; RV32I-NEXT: .LBB16_186: +; RV32I-NEXT: lw t2, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_188 +; RV32I-NEXT: # %bb.187: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: .LBB16_188: +; RV32I-NEXT: bne a5, s1, .LBB16_190 +; RV32I-NEXT: # %bb.189: +; RV32I-NEXT: or t3, a4, a3 +; RV32I-NEXT: .LBB16_190: +; RV32I-NEXT: mv a4, s11 +; RV32I-NEXT: li a3, 7 +; RV32I-NEXT: beq a5, a3, .LBB16_192 +; RV32I-NEXT: # %bb.191: +; RV32I-NEXT: mv t5, t3 +; RV32I-NEXT: .LBB16_192: +; RV32I-NEXT: beqz a1, .LBB16_194 +; RV32I-NEXT: # %bb.193: +; RV32I-NEXT: mv s3, t5 +; RV32I-NEXT: .LBB16_194: +; RV32I-NEXT: srli a1, a4, 16 +; RV32I-NEXT: lui a7, 16 +; RV32I-NEXT: srli a6, a4, 24 +; RV32I-NEXT: srli a3, t2, 16 +; RV32I-NEXT: srli t1, t2, 24 +; RV32I-NEXT: srli a5, s2, 16 +; RV32I-NEXT: srli t5, s2, 24 +; RV32I-NEXT: srli t0, s6, 16 +; RV32I-NEXT: srli t6, s6, 24 +; RV32I-NEXT: srli t4, s9, 16 +; RV32I-NEXT: srli s4, s9, 24 +; RV32I-NEXT: srli t3, ra, 16 +; RV32I-NEXT: srli s1, ra, 24 +; RV32I-NEXT: srli s0, a0, 16 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli s7, s3, 16 +; RV32I-NEXT: srli s8, s3, 24 +; RV32I-NEXT: addi a7, a7, -1 +; RV32I-NEXT: and s10, a4, a7 +; RV32I-NEXT: and s11, t2, a7 ; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb ra, 0(a2) +; RV32I-NEXT: sb a4, 0(a2) ; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t1, 3(a2) -; RV32I-NEXT: and a4, a7, t2 -; RV32I-NEXT: srli t1, s11, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb t1, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t4, 7(a2) -; RV32I-NEXT: and a3, a6, t2 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t0, 10(a2) -; RV32I-NEXT: sb s0, 11(a2) -; RV32I-NEXT: and a4, s2, t2 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a6, 3(a2) +; RV32I-NEXT: and a1, s2, a7 +; RV32I-NEXT: srli a4, s11, 8 +; RV32I-NEXT: sb t2, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: sb t1, 7(a2) +; RV32I-NEXT: and a3, s6, a7 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb s2, 8(a2) +; RV32I-NEXT: sb a1, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb t5, 11(a2) +; RV32I-NEXT: and a1, s9, a7 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb s6, 12(a2) ; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb t3, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a3, s1, t2 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s2, 16(a2) -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: sb t6, 18(a2) -; RV32I-NEXT: sb a1, 19(a2) -; RV32I-NEXT: and a1, s6, t2 -; RV32I-NEXT: and a4, a0, t2 +; RV32I-NEXT: sb t0, 14(a2) +; RV32I-NEXT: sb t6, 15(a2) +; RV32I-NEXT: and a3, ra, a7 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb s9, 16(a2) +; RV32I-NEXT: sb a1, 17(a2) +; RV32I-NEXT: sb t4, 18(a2) +; RV32I-NEXT: sb s4, 19(a2) +; RV32I-NEXT: and a1, a0, a7 +; RV32I-NEXT: and a4, s3, a7 ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s1, 20(a2) +; RV32I-NEXT: sb ra, 20(a2) ; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t5, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) -; RV32I-NEXT: sb s6, 24(a2) +; RV32I-NEXT: sb t3, 22(a2) +; RV32I-NEXT: sb s1, 23(a2) +; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s7, 27(a2) -; RV32I-NEXT: sb a0, 28(a2) +; RV32I-NEXT: sb s0, 26(a2) +; RV32I-NEXT: sb s5, 27(a2) +; RV32I-NEXT: sb s3, 28(a2) ; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s8, 30(a2) -; RV32I-NEXT: sb s9, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: sb s7, 30(a2) +; RV32I-NEXT: sb s8, 31(a2) +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret +; RV32I-NEXT: .LBB16_195: +; RV32I-NEXT: or t3, t1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_177 +; RV32I-NEXT: .LBB16_196: +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: beq a5, s8, .LBB16_178 +; RV32I-NEXT: j .LBB16_179 +; RV32I-NEXT: .LBB16_197: +; RV32I-NEXT: lw a3, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, a6, .LBB16_181 +; RV32I-NEXT: .LBB16_198: +; RV32I-NEXT: lw a6, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_182 +; RV32I-NEXT: .LBB16_199: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s4, .LBB16_183 +; RV32I-NEXT: .LBB16_200: +; RV32I-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB16_184 +; RV32I-NEXT: .LBB16_201: +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: beq a5, s0, .LBB16_185 +; RV32I-NEXT: j .LBB16_186 %src = load i256, ptr %src.ptr, align 1 %wordOff = load i256, ptr %wordOff.ptr, align 1 %bitOff = shl i256 %wordOff, 5 @@ -7581,605 +8677,809 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; ; RV32I-LABEL: shl_32bytes_dwordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu a4, 0(a0) ; RV32I-NEXT: lbu a5, 2(a0) -; RV32I-NEXT: lbu a7, 3(a0) -; RV32I-NEXT: lbu t0, 4(a0) -; RV32I-NEXT: lbu t1, 5(a0) -; RV32I-NEXT: lbu t2, 6(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: or a4, a4, a3 -; RV32I-NEXT: lbu a6, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or a5, a7, a5 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or t0, t3, t2 -; RV32I-NEXT: lbu t1, 1(a1) -; RV32I-NEXT: lbu t2, 0(a1) -; RV32I-NEXT: lbu t3, 2(a1) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t1, t1, t2 -; RV32I-NEXT: li s9, 64 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: slli a7, a7, 8 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, t3 -; RV32I-NEXT: li t4, 32 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or t3, a5, a4 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, a1, t1 -; RV32I-NEXT: slli a4, a4, 6 -; RV32I-NEXT: neg s10, a4 -; RV32I-NEXT: srl t5, t3, s10 -; RV32I-NEXT: sll s5, a5, a4 -; RV32I-NEXT: bltu a4, t4, .LBB17_2 +; RV32I-NEXT: or a6, a5, a3 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a1, a1, 6 +; RV32I-NEXT: srli a5, a1, 5 +; RV32I-NEXT: sll t5, a6, a1 +; RV32I-NEXT: li s5, 1 +; RV32I-NEXT: mv a4, t5 +; RV32I-NEXT: beqz a5, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: sll a7, t3, a4 -; RV32I-NEXT: j .LBB17_3 +; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sll s8, t3, a4 -; RV32I-NEXT: or a7, t5, s5 -; RV32I-NEXT: .LBB17_3: -; RV32I-NEXT: lbu t2, 9(a0) -; RV32I-NEXT: lbu a1, 10(a0) -; RV32I-NEXT: lbu t1, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t6, a3, 8 -; RV32I-NEXT: sub s6, s9, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a4, .LBB17_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: .LBB17_5: -; RV32I-NEXT: slli a7, t2, 8 -; RV32I-NEXT: or a6, a6, a1 -; RV32I-NEXT: lbu t2, 8(a0) -; RV32I-NEXT: lbu a1, 12(a0) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t6, t0 -; RV32I-NEXT: neg t6, s6 -; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s6, t4, .LBB17_7 -; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srl t6, a5, s6 -; RV32I-NEXT: j .LBB17_8 -; RV32I-NEXT: .LBB17_7: -; RV32I-NEXT: sll t6, a5, t6 -; RV32I-NEXT: or t6, t5, t6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li s0, 2 +; RV32I-NEXT: beq a5, s5, .LBB17_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: .LBB17_4: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: beq a5, s0, .LBB17_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: .LBB17_6: +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li s8, 3 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: beq a5, s8, .LBB17_8 +; RV32I-NEXT: # %bb.7: +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB17_8: -; RV32I-NEXT: or a7, a7, t2 -; RV32I-NEXT: slli t2, a6, 16 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: slli t0, t0, 16 -; RV32I-NEXT: mv a6, t3 -; RV32I-NEXT: beqz s6, .LBB17_10 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: li s1, 5 +; RV32I-NEXT: beq a5, s10, .LBB17_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a6, t6 +; RV32I-NEXT: mv t0, a4 ; RV32I-NEXT: .LBB17_10: -; RV32I-NEXT: or t1, t2, a7 -; RV32I-NEXT: or t2, t0, a1 -; RV32I-NEXT: bltu s6, t4, .LBB17_12 -; RV32I-NEXT: # %bb.11: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: j .LBB17_13 +; RV32I-NEXT: lbu t1, 7(a0) +; RV32I-NEXT: li s6, 6 +; RV32I-NEXT: beq a5, s1, .LBB17_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: mv a7, t0 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: srl a7, a5, s10 -; RV32I-NEXT: .LBB17_13: -; RV32I-NEXT: srl s0, t1, s10 -; RV32I-NEXT: sll a1, t2, a4 -; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t4, .LBB17_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sll a1, t1, a4 -; RV32I-NEXT: j .LBB17_16 -; RV32I-NEXT: .LBB17_15: -; RV32I-NEXT: sll s1, t1, a4 -; RV32I-NEXT: or a1, s0, a1 +; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: lbu t0, 5(a0) +; RV32I-NEXT: lbu a3, 6(a0) +; RV32I-NEXT: slli t2, t1, 8 +; RV32I-NEXT: beq a5, s6, .LBB17_14 +; RV32I-NEXT: # %bb.13: +; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: .LBB17_14: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu t1, 4(a0) +; RV32I-NEXT: or t2, t2, a3 +; RV32I-NEXT: li ra, 7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: beq a5, ra, .LBB17_16 +; RV32I-NEXT: # %bb.15: +; RV32I-NEXT: mv a7, a4 ; RV32I-NEXT: .LBB17_16: -; RV32I-NEXT: addi s7, a4, -64 -; RV32I-NEXT: mv s3, t2 -; RV32I-NEXT: beqz a4, .LBB17_18 +; RV32I-NEXT: or a3, t0, t1 +; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: andi t6, a1, 31 +; RV32I-NEXT: mv a4, a6 +; RV32I-NEXT: beqz a1, .LBB17_18 ; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB17_18: -; RV32I-NEXT: neg a1, s7 -; RV32I-NEXT: sw a1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s7, t4, .LBB17_20 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a7, t2, a3 +; RV32I-NEXT: neg s3, t6 +; RV32I-NEXT: srl s4, a6, s3 +; RV32I-NEXT: beqz t6, .LBB17_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: sll a1, t3, s7 -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: bnez s7, .LBB17_21 -; RV32I-NEXT: j .LBB17_22 +; RV32I-NEXT: mv t0, s4 ; RV32I-NEXT: .LBB17_20: -; RV32I-NEXT: sll s2, t3, a4 -; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: or a1, a1, s5 -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: beqz s7, .LBB17_22 -; RV32I-NEXT: .LBB17_21: -; RV32I-NEXT: mv s4, a1 -; RV32I-NEXT: .LBB17_22: -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bltu a4, s9, .LBB17_24 -; RV32I-NEXT: # %bb.23: -; RV32I-NEXT: sw zero, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sll s9, a7, a1 +; RV32I-NEXT: beqz a5, .LBB17_22 +; RV32I-NEXT: # %bb.21: ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: j .LBB17_25 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: bne a5, s5, .LBB17_23 +; RV32I-NEXT: j .LBB17_24 +; RV32I-NEXT: .LBB17_22: +; RV32I-NEXT: or a3, s9, t0 +; RV32I-NEXT: mv a6, t5 +; RV32I-NEXT: beq a5, s5, .LBB17_24 +; RV32I-NEXT: .LBB17_23: +; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: .LBB17_24: -; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, a6, s1 -; RV32I-NEXT: or s4, a7, s3 -; RV32I-NEXT: .LBB17_25: -; RV32I-NEXT: sub ra, a1, a4 -; RV32I-NEXT: mv a7, t1 -; RV32I-NEXT: mv a6, t2 -; RV32I-NEXT: beqz a4, .LBB17_27 -; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: mv a7, s2 -; RV32I-NEXT: mv a6, s4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s0, .LBB17_40 +; RV32I-NEXT: # %bb.25: +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: bne a5, s8, .LBB17_41 +; RV32I-NEXT: .LBB17_26: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s10, .LBB17_28 ; RV32I-NEXT: .LBB17_27: -; RV32I-NEXT: neg s1, ra -; RV32I-NEXT: sll s2, t2, s1 -; RV32I-NEXT: bltu ra, t4, .LBB17_29 -; RV32I-NEXT: # %bb.28: -; RV32I-NEXT: srl a1, t2, ra -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: bnez ra, .LBB17_30 -; RV32I-NEXT: j .LBB17_31 -; RV32I-NEXT: .LBB17_29: -; RV32I-NEXT: or a1, s0, s2 -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: beqz ra, .LBB17_31 +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: .LBB17_28: +; RV32I-NEXT: lbu t2, 11(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s1, .LBB17_30 +; RV32I-NEXT: # %bb.29: +; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB17_30: -; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB17_31: -; RV32I-NEXT: bltu ra, t4, .LBB17_33 -; RV32I-NEXT: # %bb.32: -; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: srl a1, a5, ra -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: bnez ra, .LBB17_34 -; RV32I-NEXT: j .LBB17_35 -; RV32I-NEXT: .LBB17_33: -; RV32I-NEXT: srl a1, t2, s10 -; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sll a1, a5, s1 -; RV32I-NEXT: or a1, t5, a1 -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: beqz ra, .LBB17_35 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t1, 9(a0) +; RV32I-NEXT: lbu a3, 10(a0) +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: beq a5, s6, .LBB17_32 +; RV32I-NEXT: # %bb.31: +; RV32I-NEXT: mv a6, t0 +; RV32I-NEXT: .LBB17_32: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t3, 8(a0) +; RV32I-NEXT: or t2, t2, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB17_34 +; RV32I-NEXT: # %bb.33: +; RV32I-NEXT: mv t0, a6 ; RV32I-NEXT: .LBB17_34: -; RV32I-NEXT: mv t5, a1 -; RV32I-NEXT: .LBB17_35: -; RV32I-NEXT: sub s3, s9, ra -; RV32I-NEXT: bltu ra, t4, .LBB17_38 -; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s3, t4, .LBB17_39 -; RV32I-NEXT: .LBB17_37: -; RV32I-NEXT: sll s1, t1, s1 -; RV32I-NEXT: neg a1, s3 -; RV32I-NEXT: srl a1, t1, a1 -; RV32I-NEXT: or a1, a1, s2 -; RV32I-NEXT: j .LBB17_40 +; RV32I-NEXT: or a3, t1, t3 +; RV32I-NEXT: slli a6, t2, 16 +; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: beqz a1, .LBB17_36 +; RV32I-NEXT: # %bb.35: +; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: .LBB17_36: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a6, a6, a3 +; RV32I-NEXT: srl a3, a7, s3 +; RV32I-NEXT: sw a3, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t6, .LBB17_38 +; RV32I-NEXT: # %bb.37: +; RV32I-NEXT: lw t0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_38: -; RV32I-NEXT: srl a1, a5, s10 -; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s3, t4, .LBB17_37 -; RV32I-NEXT: .LBB17_39: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sll a1, t1, s3 +; RV32I-NEXT: sll s7, a6, a1 +; RV32I-NEXT: beqz a5, .LBB17_42 +; RV32I-NEXT: # %bb.39: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_43 +; RV32I-NEXT: j .LBB17_44 ; RV32I-NEXT: .LBB17_40: -; RV32I-NEXT: addi s4, ra, -64 -; RV32I-NEXT: mv s2, t2 -; RV32I-NEXT: beqz s3, .LBB17_42 -; RV32I-NEXT: # %bb.41: -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: beq a5, s8, .LBB17_26 +; RV32I-NEXT: .LBB17_41: +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s10, .LBB17_27 +; RV32I-NEXT: j .LBB17_28 ; RV32I-NEXT: .LBB17_42: -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s5, a7 -; RV32I-NEXT: bltu s4, t4, .LBB17_44 -; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: srl t0, t2, s4 -; RV32I-NEXT: j .LBB17_45 +; RV32I-NEXT: or a7, s7, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_44 +; RV32I-NEXT: .LBB17_43: +; RV32I-NEXT: mv a3, s4 ; RV32I-NEXT: .LBB17_44: -; RV32I-NEXT: srl a1, t1, ra -; RV32I-NEXT: neg t0, s4 -; RV32I-NEXT: sll t0, t2, t0 -; RV32I-NEXT: or t0, a1, t0 -; RV32I-NEXT: .LBB17_45: -; RV32I-NEXT: mv s0, s10 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: lbu s8, 19(a0) -; RV32I-NEXT: lbu a1, 23(a0) -; RV32I-NEXT: mv s3, t1 -; RV32I-NEXT: beqz s4, .LBB17_47 -; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: mv s3, t0 +; RV32I-NEXT: beq a5, s5, .LBB17_61 +; RV32I-NEXT: # %bb.45: +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne a5, s0, .LBB17_62 +; RV32I-NEXT: .LBB17_46: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: bne a5, s8, .LBB17_63 ; RV32I-NEXT: .LBB17_47: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: lbu s10, 17(a0) -; RV32I-NEXT: lbu t0, 18(a0) -; RV32I-NEXT: lbu s9, 21(a0) -; RV32I-NEXT: lbu t6, 22(a0) -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: bltu s4, t4, .LBB17_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: j .LBB17_50 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s10, .LBB17_49 +; RV32I-NEXT: .LBB17_48: +; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB17_49: -; RV32I-NEXT: srl s4, t2, ra -; RV32I-NEXT: .LBB17_50: -; RV32I-NEXT: or s11, s8, t0 -; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu s8, 20(a0) -; RV32I-NEXT: slli s10, s10, 8 -; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: or t6, a1, t6 -; RV32I-NEXT: bgeu ra, a3, .LBB17_52 -; RV32I-NEXT: # %bb.51: -; RV32I-NEXT: or s3, t5, s1 -; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s4, a1, s2 -; RV32I-NEXT: .LBB17_52: -; RV32I-NEXT: or a1, s10, t0 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: or t0, s9, s8 -; RV32I-NEXT: slli t6, t6, 16 -; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: beqz ra, .LBB17_54 -; RV32I-NEXT: # %bb.53: -; RV32I-NEXT: mv t5, s3 -; RV32I-NEXT: mv s1, s4 -; RV32I-NEXT: .LBB17_54: -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, s11, a1 -; RV32I-NEXT: or s1, t6, t0 -; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv a6, a7 -; RV32I-NEXT: mv a7, s0 -; RV32I-NEXT: bltu ra, a1, .LBB17_56 -; RV32I-NEXT: # %bb.55: -; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB17_56: -; RV32I-NEXT: srl s3, s2, a7 -; RV32I-NEXT: sll ra, s1, a4 -; RV32I-NEXT: mv a7, s5 -; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a4, t4, .LBB17_58 -; RV32I-NEXT: # %bb.57: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sll a1, s2, a4 -; RV32I-NEXT: j .LBB17_59 -; RV32I-NEXT: .LBB17_58: -; RV32I-NEXT: sll a1, s2, a4 -; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: or a1, s3, ra +; RV32I-NEXT: lbu t3, 15(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s1, .LBB17_51 +; RV32I-NEXT: # %bb.50: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB17_51: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu t1, 13(a0) +; RV32I-NEXT: lbu a3, 14(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s6, .LBB17_53 +; RV32I-NEXT: # %bb.52: +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: .LBB17_53: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 12(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB17_55 +; RV32I-NEXT: # %bb.54: +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: .LBB17_55: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s2, a6 +; RV32I-NEXT: beqz a1, .LBB17_57 +; RV32I-NEXT: # %bb.56: +; RV32I-NEXT: mv s2, t0 +; RV32I-NEXT: .LBB17_57: +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a7, t3, a3 +; RV32I-NEXT: srl a3, a6, s3 +; RV32I-NEXT: sw a3, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t6, .LBB17_59 +; RV32I-NEXT: # %bb.58: +; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_59: -; RV32I-NEXT: lbu s9, 27(a0) -; RV32I-NEXT: lbu t6, 31(a0) -; RV32I-NEXT: mv t5, s1 -; RV32I-NEXT: beqz a4, .LBB17_61 +; RV32I-NEXT: sll a3, a7, a1 +; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz a5, .LBB17_64 ; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: mv t5, a1 +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_65 +; RV32I-NEXT: j .LBB17_66 ; RV32I-NEXT: .LBB17_61: -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s4, 26(a0) -; RV32I-NEXT: lbu s11, 29(a0) -; RV32I-NEXT: lbu s10, 30(a0) -; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: bltu s6, t4, .LBB17_63 -; RV32I-NEXT: # %bb.62: -; RV32I-NEXT: srl t0, s1, s6 -; RV32I-NEXT: j .LBB17_64 +; RV32I-NEXT: or a7, s9, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: beq a5, s0, .LBB17_46 +; RV32I-NEXT: .LBB17_62: +; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: beq a5, s8, .LBB17_47 ; RV32I-NEXT: .LBB17_63: -; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sll a1, s1, a1 -; RV32I-NEXT: or t0, s3, a1 +; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s10, .LBB17_48 +; RV32I-NEXT: j .LBB17_49 ; RV32I-NEXT: .LBB17_64: -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: lbu s3, 24(a0) -; RV32I-NEXT: lbu a1, 28(a0) -; RV32I-NEXT: or s4, s9, s4 -; RV32I-NEXT: slli s11, s11, 8 -; RV32I-NEXT: or t6, t6, s10 -; RV32I-NEXT: mv s9, s2 -; RV32I-NEXT: beqz s6, .LBB17_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: or a6, a3, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_66 +; RV32I-NEXT: .LBB17_65: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_66: -; RV32I-NEXT: or a0, s8, s3 -; RV32I-NEXT: slli t0, s4, 16 -; RV32I-NEXT: or a1, s11, a1 -; RV32I-NEXT: slli t6, t6, 16 -; RV32I-NEXT: bltu s6, t4, .LBB17_68 +; RV32I-NEXT: beq a5, s5, .LBB17_84 ; RV32I-NEXT: # %bb.67: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: j .LBB17_69 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_85 ; RV32I-NEXT: .LBB17_68: -; RV32I-NEXT: srl s4, s1, s0 +; RV32I-NEXT: beq a5, s0, .LBB17_86 ; RV32I-NEXT: .LBB17_69: -; RV32I-NEXT: li s11, 64 -; RV32I-NEXT: or s6, t0, a0 -; RV32I-NEXT: or a0, t6, a1 -; RV32I-NEXT: bltu a4, t4, .LBB17_71 -; RV32I-NEXT: # %bb.70: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a1, s6, a4 -; RV32I-NEXT: mv s10, a0 -; RV32I-NEXT: bnez a4, .LBB17_72 -; RV32I-NEXT: j .LBB17_73 +; RV32I-NEXT: mv t0, t5 +; RV32I-NEXT: bne a5, s8, .LBB17_87 +; RV32I-NEXT: .LBB17_70: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beq a5, s10, .LBB17_72 ; RV32I-NEXT: .LBB17_71: -; RV32I-NEXT: sll s3, s6, a4 -; RV32I-NEXT: srl a1, s6, s0 -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: mv s10, a0 -; RV32I-NEXT: beqz a4, .LBB17_73 +; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB17_72: -; RV32I-NEXT: mv s10, a1 -; RV32I-NEXT: .LBB17_73: -; RV32I-NEXT: bltu s7, t4, .LBB17_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: sll a1, s2, s7 -; RV32I-NEXT: mv s0, s1 -; RV32I-NEXT: bnez s7, .LBB17_76 -; RV32I-NEXT: j .LBB17_77 -; RV32I-NEXT: .LBB17_75: -; RV32I-NEXT: sll s5, s2, a4 -; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl a1, s2, a1 -; RV32I-NEXT: or a1, a1, ra -; RV32I-NEXT: mv s0, s1 -; RV32I-NEXT: beqz s7, .LBB17_77 +; RV32I-NEXT: lbu t3, 19(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s1, .LBB17_74 +; RV32I-NEXT: # %bb.73: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB17_74: +; RV32I-NEXT: li a6, 0 +; RV32I-NEXT: lbu t1, 17(a0) +; RV32I-NEXT: lbu a3, 18(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: beq a5, s6, .LBB17_76 +; RV32I-NEXT: # %bb.75: +; RV32I-NEXT: mv a6, t0 ; RV32I-NEXT: .LBB17_76: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB17_77: -; RV32I-NEXT: bltu a4, s11, .LBB17_79 -; RV32I-NEXT: # %bb.78: -; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: li t5, 0 -; RV32I-NEXT: j .LBB17_80 -; RV32I-NEXT: .LBB17_79: -; RV32I-NEXT: or s5, s9, s3 -; RV32I-NEXT: or s0, s4, s10 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 16(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB17_78 +; RV32I-NEXT: # %bb.77: +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: .LBB17_78: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s6, a7 +; RV32I-NEXT: beqz a1, .LBB17_80 +; RV32I-NEXT: # %bb.79: +; RV32I-NEXT: mv s6, t0 ; RV32I-NEXT: .LBB17_80: -; RV32I-NEXT: addi s9, a4, -128 -; RV32I-NEXT: mv s7, s6 -; RV32I-NEXT: mv s8, a0 -; RV32I-NEXT: beqz a4, .LBB17_82 +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: or a6, t3, a3 +; RV32I-NEXT: srl s10, a7, s3 +; RV32I-NEXT: beqz t6, .LBB17_82 ; RV32I-NEXT: # %bb.81: -; RV32I-NEXT: mv s7, s5 -; RV32I-NEXT: mv s8, s0 +; RV32I-NEXT: mv t0, s10 ; RV32I-NEXT: .LBB17_82: -; RV32I-NEXT: neg s3, s9 -; RV32I-NEXT: srl s0, t3, s3 -; RV32I-NEXT: bltu s9, t4, .LBB17_84 +; RV32I-NEXT: sll s11, a6, a1 +; RV32I-NEXT: beqz a5, .LBB17_88 ; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: sll a1, t3, s9 -; RV32I-NEXT: j .LBB17_85 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_89 +; RV32I-NEXT: j .LBB17_90 ; RV32I-NEXT: .LBB17_84: -; RV32I-NEXT: sll s5, t3, a4 -; RV32I-NEXT: lw a1, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, s0, a1 +; RV32I-NEXT: or a6, s7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_68 ; RV32I-NEXT: .LBB17_85: -; RV32I-NEXT: sub s4, s11, s9 -; RV32I-NEXT: mv t6, a5 -; RV32I-NEXT: beqz s9, .LBB17_87 -; RV32I-NEXT: # %bb.86: -; RV32I-NEXT: mv t6, a1 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne a5, s0, .LBB17_69 +; RV32I-NEXT: .LBB17_86: +; RV32I-NEXT: or a6, s9, a3 +; RV32I-NEXT: mv t0, t5 +; RV32I-NEXT: beq a5, s8, .LBB17_70 ; RV32I-NEXT: .LBB17_87: -; RV32I-NEXT: bltu s4, t4, .LBB17_89 -; RV32I-NEXT: # %bb.88: -; RV32I-NEXT: srl a1, a5, s4 -; RV32I-NEXT: mv s0, t3 -; RV32I-NEXT: bnez s4, .LBB17_90 -; RV32I-NEXT: j .LBB17_91 +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bne a5, s10, .LBB17_71 +; RV32I-NEXT: j .LBB17_72 +; RV32I-NEXT: .LBB17_88: +; RV32I-NEXT: or a7, s11, t0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_90 ; RV32I-NEXT: .LBB17_89: -; RV32I-NEXT: neg a1, s4 -; RV32I-NEXT: sll a1, a5, a1 -; RV32I-NEXT: or a1, s0, a1 -; RV32I-NEXT: mv s0, t3 -; RV32I-NEXT: beqz s4, .LBB17_91 +; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB17_90: -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: .LBB17_91: -; RV32I-NEXT: bltu s4, t4, .LBB17_94 -; RV32I-NEXT: # %bb.92: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: bgeu s9, t4, .LBB17_95 +; RV32I-NEXT: beq a5, s5, .LBB17_110 +; RV32I-NEXT: # %bb.91: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_111 +; RV32I-NEXT: .LBB17_92: +; RV32I-NEXT: beq a5, s0, .LBB17_112 ; RV32I-NEXT: .LBB17_93: -; RV32I-NEXT: sll s10, t1, a4 -; RV32I-NEXT: srl a1, t1, s3 -; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: j .LBB17_96 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_113 ; RV32I-NEXT: .LBB17_94: -; RV32I-NEXT: srl s4, a5, s3 -; RV32I-NEXT: li ra, 64 -; RV32I-NEXT: bltu s9, t4, .LBB17_93 +; RV32I-NEXT: bne a5, s8, .LBB17_96 ; RV32I-NEXT: .LBB17_95: -; RV32I-NEXT: li s10, 0 -; RV32I-NEXT: sll a1, t1, s9 +; RV32I-NEXT: or a7, s9, a3 ; RV32I-NEXT: .LBB17_96: -; RV32I-NEXT: addi s11, s9, -64 -; RV32I-NEXT: mv s3, t2 -; RV32I-NEXT: beqz s9, .LBB17_98 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: li t0, 4 +; RV32I-NEXT: beq a5, t0, .LBB17_98 ; RV32I-NEXT: # %bb.97: -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB17_98: -; RV32I-NEXT: bltu s11, t4, .LBB17_100 +; RV32I-NEXT: lbu t3, 23(a0) +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: beq a5, s1, .LBB17_100 ; RV32I-NEXT: # %bb.99: +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: .LBB17_100: +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu t1, 21(a0) +; RV32I-NEXT: lbu a3, 22(a0) +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: li t4, 6 +; RV32I-NEXT: beq a5, t4, .LBB17_102 +; RV32I-NEXT: # %bb.101: +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: .LBB17_102: +; RV32I-NEXT: sw s9, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li t0, 0 +; RV32I-NEXT: lbu t4, 20(a0) +; RV32I-NEXT: or t3, t3, a3 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: beq a5, ra, .LBB17_104 +; RV32I-NEXT: # %bb.103: +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: .LBB17_104: +; RV32I-NEXT: or a3, t1, t4 +; RV32I-NEXT: slli t3, t3, 16 +; RV32I-NEXT: mv s9, a6 +; RV32I-NEXT: beqz a1, .LBB17_106 +; RV32I-NEXT: # %bb.105: +; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: .LBB17_106: +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: or t0, t3, a3 +; RV32I-NEXT: srl a6, a6, s3 +; RV32I-NEXT: beqz t6, .LBB17_108 +; RV32I-NEXT: # %bb.107: +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: .LBB17_108: +; RV32I-NEXT: sll a7, t0, a1 +; RV32I-NEXT: beqz a5, .LBB17_114 +; RV32I-NEXT: # %bb.109: +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_115 +; RV32I-NEXT: j .LBB17_116 +; RV32I-NEXT: .LBB17_110: +; RV32I-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a7, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_92 +; RV32I-NEXT: .LBB17_111: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s0, .LBB17_93 +; RV32I-NEXT: .LBB17_112: +; RV32I-NEXT: or a7, s7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_94 +; RV32I-NEXT: .LBB17_113: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: beq a5, s8, .LBB17_95 +; RV32I-NEXT: j .LBB17_96 +; RV32I-NEXT: .LBB17_114: +; RV32I-NEXT: or t1, a7, t1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_116 +; RV32I-NEXT: .LBB17_115: +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: .LBB17_116: +; RV32I-NEXT: beq a5, s5, .LBB17_138 +; RV32I-NEXT: # %bb.117: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_139 +; RV32I-NEXT: .LBB17_118: +; RV32I-NEXT: beq a5, s0, .LBB17_140 +; RV32I-NEXT: .LBB17_119: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_141 +; RV32I-NEXT: .LBB17_120: +; RV32I-NEXT: bne a5, s8, .LBB17_122 +; RV32I-NEXT: .LBB17_121: +; RV32I-NEXT: or t1, s7, a3 +; RV32I-NEXT: .LBB17_122: +; RV32I-NEXT: li s4, 1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_124 +; RV32I-NEXT: # %bb.123: +; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: .LBB17_124: +; RV32I-NEXT: li s5, 3 +; RV32I-NEXT: li s8, 2 +; RV32I-NEXT: li t3, 4 +; RV32I-NEXT: bne a5, t3, .LBB17_126 +; RV32I-NEXT: # %bb.125: +; RV32I-NEXT: lw t1, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t1, t1, a3 +; RV32I-NEXT: .LBB17_126: +; RV32I-NEXT: lbu s0, 27(a0) +; RV32I-NEXT: mv t3, t5 +; RV32I-NEXT: beq a5, s1, .LBB17_128 +; RV32I-NEXT: # %bb.127: +; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: .LBB17_128: +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: lbu t4, 25(a0) +; RV32I-NEXT: lbu a3, 26(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: beq a5, s1, .LBB17_130 +; RV32I-NEXT: # %bb.129: +; RV32I-NEXT: mv t1, t3 +; RV32I-NEXT: .LBB17_130: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s1, 24(a0) +; RV32I-NEXT: or s0, s0, a3 +; RV32I-NEXT: slli a3, t4, 8 +; RV32I-NEXT: beq a5, ra, .LBB17_132 +; RV32I-NEXT: # %bb.131: +; RV32I-NEXT: mv t3, t1 +; RV32I-NEXT: .LBB17_132: +; RV32I-NEXT: or a3, a3, s1 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: mv ra, t0 +; RV32I-NEXT: beqz a1, .LBB17_134 +; RV32I-NEXT: # %bb.133: +; RV32I-NEXT: mv ra, t3 +; RV32I-NEXT: .LBB17_134: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: or t3, s0, a3 +; RV32I-NEXT: srl t0, t0, s3 +; RV32I-NEXT: li s0, 5 +; RV32I-NEXT: beqz t6, .LBB17_136 +; RV32I-NEXT: # %bb.135: +; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: .LBB17_136: +; RV32I-NEXT: sll t1, t3, a1 +; RV32I-NEXT: beqz a5, .LBB17_142 +; RV32I-NEXT: # %bb.137: ; RV32I-NEXT: li t4, 0 -; RV32I-NEXT: sll a1, t3, s11 -; RV32I-NEXT: bnez s11, .LBB17_101 -; RV32I-NEXT: j .LBB17_102 -; RV32I-NEXT: .LBB17_100: -; RV32I-NEXT: sll t4, t3, s9 -; RV32I-NEXT: neg a1, s11 -; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: sll t0, a5, s9 -; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: beqz s11, .LBB17_102 -; RV32I-NEXT: .LBB17_101: -; RV32I-NEXT: mv a5, a1 -; RV32I-NEXT: .LBB17_102: -; RV32I-NEXT: bltu s9, ra, .LBB17_104 -; RV32I-NEXT: # %bb.103: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li t6, 0 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: bnez s9, .LBB17_105 -; RV32I-NEXT: j .LBB17_106 -; RV32I-NEXT: .LBB17_104: -; RV32I-NEXT: or t4, s0, s10 -; RV32I-NEXT: or a5, s4, s3 -; RV32I-NEXT: li a1, 128 -; RV32I-NEXT: beqz s9, .LBB17_106 -; RV32I-NEXT: .LBB17_105: -; RV32I-NEXT: mv t1, t4 -; RV32I-NEXT: mv t2, a5 -; RV32I-NEXT: .LBB17_106: -; RV32I-NEXT: bltu a4, a1, .LBB17_108 -; RV32I-NEXT: # %bb.107: -; RV32I-NEXT: li ra, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: bnez a4, .LBB17_109 -; RV32I-NEXT: j .LBB17_110 -; RV32I-NEXT: .LBB17_108: -; RV32I-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a5, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s5, a1, a5 -; RV32I-NEXT: lw a1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t6, a1, t5 -; RV32I-NEXT: lw a1, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t1, a1, s7 -; RV32I-NEXT: lw a1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t2, a1, s8 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: beqz a4, .LBB17_110 -; RV32I-NEXT: .LBB17_109: -; RV32I-NEXT: mv s2, s5 -; RV32I-NEXT: mv s1, t6 -; RV32I-NEXT: mv s6, t1 -; RV32I-NEXT: mv a0, t2 -; RV32I-NEXT: .LBB17_110: -; RV32I-NEXT: srli a4, ra, 16 -; RV32I-NEXT: lui t2, 16 -; RV32I-NEXT: srli t1, ra, 24 -; RV32I-NEXT: srli a5, a3, 16 -; RV32I-NEXT: srli t4, a3, 24 -; RV32I-NEXT: srli t0, a7, 16 -; RV32I-NEXT: srli s0, a7, 24 -; RV32I-NEXT: srli t3, a6, 16 -; RV32I-NEXT: srli s3, a6, 24 -; RV32I-NEXT: srli t6, s2, 16 -; RV32I-NEXT: srli a1, s2, 24 -; RV32I-NEXT: srli t5, s1, 16 -; RV32I-NEXT: srli s5, s1, 24 -; RV32I-NEXT: srli s4, s6, 16 -; RV32I-NEXT: srli s7, s6, 24 -; RV32I-NEXT: srli s8, a0, 16 -; RV32I-NEXT: srli s9, a0, 24 -; RV32I-NEXT: addi t2, t2, -1 -; RV32I-NEXT: and s10, ra, t2 -; RV32I-NEXT: and s11, a3, t2 +; RV32I-NEXT: bnez t6, .LBB17_143 +; RV32I-NEXT: j .LBB17_144 +; RV32I-NEXT: .LBB17_138: +; RV32I-NEXT: or t1, s11, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_118 +; RV32I-NEXT: .LBB17_139: +; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s0, .LBB17_119 +; RV32I-NEXT: .LBB17_140: +; RV32I-NEXT: lw t1, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t1, t1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_120 +; RV32I-NEXT: .LBB17_141: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq a5, s8, .LBB17_121 +; RV32I-NEXT: j .LBB17_122 +; RV32I-NEXT: .LBB17_142: +; RV32I-NEXT: or t4, t1, t4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_144 +; RV32I-NEXT: .LBB17_143: +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: .LBB17_144: +; RV32I-NEXT: beq a5, s4, .LBB17_164 +; RV32I-NEXT: # %bb.145: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_165 +; RV32I-NEXT: .LBB17_146: +; RV32I-NEXT: beq a5, s8, .LBB17_166 +; RV32I-NEXT: .LBB17_147: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_167 +; RV32I-NEXT: .LBB17_148: +; RV32I-NEXT: beq a5, s5, .LBB17_168 +; RV32I-NEXT: .LBB17_149: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_169 +; RV32I-NEXT: .LBB17_150: +; RV32I-NEXT: li s1, 4 +; RV32I-NEXT: beq a5, s1, .LBB17_170 +; RV32I-NEXT: .LBB17_151: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_171 +; RV32I-NEXT: .LBB17_152: +; RV32I-NEXT: bne a5, s0, .LBB17_154 +; RV32I-NEXT: .LBB17_153: +; RV32I-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: .LBB17_154: +; RV32I-NEXT: lbu s0, 31(a0) +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: beq a5, s1, .LBB17_156 +; RV32I-NEXT: # %bb.155: +; RV32I-NEXT: mv a3, t4 +; RV32I-NEXT: .LBB17_156: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: lbu s5, 29(a0) +; RV32I-NEXT: lbu s1, 30(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: li s4, 7 +; RV32I-NEXT: beq a5, s4, .LBB17_158 +; RV32I-NEXT: # %bb.157: +; RV32I-NEXT: mv t4, a3 +; RV32I-NEXT: .LBB17_158: +; RV32I-NEXT: lbu a3, 28(a0) +; RV32I-NEXT: slli s5, s5, 8 +; RV32I-NEXT: or s0, s0, s1 +; RV32I-NEXT: mv a0, t3 +; RV32I-NEXT: beqz a1, .LBB17_160 +; RV32I-NEXT: # %bb.159: +; RV32I-NEXT: mv a0, t4 +; RV32I-NEXT: .LBB17_160: +; RV32I-NEXT: li t4, 0 +; RV32I-NEXT: or a3, s5, a3 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: li s1, 5 +; RV32I-NEXT: li s4, 4 +; RV32I-NEXT: beqz t6, .LBB17_162 +; RV32I-NEXT: # %bb.161: +; RV32I-NEXT: srl t4, t3, s3 +; RV32I-NEXT: .LBB17_162: +; RV32I-NEXT: or s3, s0, a3 +; RV32I-NEXT: li s0, 6 +; RV32I-NEXT: li s5, 1 +; RV32I-NEXT: beqz a5, .LBB17_172 +; RV32I-NEXT: # %bb.163: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_173 +; RV32I-NEXT: j .LBB17_174 +; RV32I-NEXT: .LBB17_164: +; RV32I-NEXT: or t4, a7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_146 +; RV32I-NEXT: .LBB17_165: +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: bne a5, s8, .LBB17_147 +; RV32I-NEXT: .LBB17_166: +; RV32I-NEXT: or t4, s11, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_148 +; RV32I-NEXT: .LBB17_167: +; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s5, .LBB17_149 +; RV32I-NEXT: .LBB17_168: +; RV32I-NEXT: lw t4, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t4, t4, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_150 +; RV32I-NEXT: .LBB17_169: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: li s1, 4 +; RV32I-NEXT: bne a5, s1, .LBB17_151 +; RV32I-NEXT: .LBB17_170: +; RV32I-NEXT: or t4, s7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_152 +; RV32I-NEXT: .LBB17_171: +; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq a5, s0, .LBB17_153 +; RV32I-NEXT: j .LBB17_154 +; RV32I-NEXT: .LBB17_172: +; RV32I-NEXT: sll a3, s3, a1 +; RV32I-NEXT: or t3, a3, t4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_174 +; RV32I-NEXT: .LBB17_173: +; RV32I-NEXT: mv a3, t0 +; RV32I-NEXT: .LBB17_174: +; RV32I-NEXT: beq a5, s5, .LBB17_190 +; RV32I-NEXT: # %bb.175: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_191 +; RV32I-NEXT: .LBB17_176: +; RV32I-NEXT: bne a5, s8, .LBB17_178 +; RV32I-NEXT: .LBB17_177: +; RV32I-NEXT: or t3, a7, a3 +; RV32I-NEXT: .LBB17_178: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a6, 3 +; RV32I-NEXT: bnez t6, .LBB17_192 +; RV32I-NEXT: # %bb.179: +; RV32I-NEXT: beq a5, a6, .LBB17_193 +; RV32I-NEXT: .LBB17_180: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_194 +; RV32I-NEXT: .LBB17_181: +; RV32I-NEXT: beq a5, s4, .LBB17_195 +; RV32I-NEXT: .LBB17_182: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_196 +; RV32I-NEXT: .LBB17_183: +; RV32I-NEXT: beq a5, s1, .LBB17_197 +; RV32I-NEXT: .LBB17_184: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t6, .LBB17_198 +; RV32I-NEXT: .LBB17_185: +; RV32I-NEXT: beq a5, s0, .LBB17_199 +; RV32I-NEXT: .LBB17_186: +; RV32I-NEXT: li a3, 7 +; RV32I-NEXT: bne a5, a3, .LBB17_200 +; RV32I-NEXT: .LBB17_187: +; RV32I-NEXT: beqz a1, .LBB17_189 +; RV32I-NEXT: .LBB17_188: +; RV32I-NEXT: mv s3, t5 +; RV32I-NEXT: .LBB17_189: +; RV32I-NEXT: srli a1, a4, 16 +; RV32I-NEXT: lui a7, 16 +; RV32I-NEXT: srli a6, a4, 24 +; RV32I-NEXT: srli a3, t2, 16 +; RV32I-NEXT: srli t1, t2, 24 +; RV32I-NEXT: srli a5, s2, 16 +; RV32I-NEXT: srli t5, s2, 24 +; RV32I-NEXT: srli t0, s6, 16 +; RV32I-NEXT: srli t6, s6, 24 +; RV32I-NEXT: srli t4, s9, 16 +; RV32I-NEXT: srli s4, s9, 24 +; RV32I-NEXT: srli t3, ra, 16 +; RV32I-NEXT: srli s1, ra, 24 +; RV32I-NEXT: srli s0, a0, 16 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli s7, s3, 16 +; RV32I-NEXT: srli s8, s3, 24 +; RV32I-NEXT: addi a7, a7, -1 +; RV32I-NEXT: and s10, a4, a7 +; RV32I-NEXT: and s11, t2, a7 ; RV32I-NEXT: srli s10, s10, 8 -; RV32I-NEXT: sb ra, 0(a2) +; RV32I-NEXT: sb a4, 0(a2) ; RV32I-NEXT: sb s10, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb t1, 3(a2) -; RV32I-NEXT: and a4, a7, t2 -; RV32I-NEXT: srli t1, s11, 8 -; RV32I-NEXT: sb a3, 4(a2) -; RV32I-NEXT: sb t1, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t4, 7(a2) -; RV32I-NEXT: and a3, a6, t2 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t0, 10(a2) -; RV32I-NEXT: sb s0, 11(a2) -; RV32I-NEXT: and a4, s2, t2 +; RV32I-NEXT: sb a1, 2(a2) +; RV32I-NEXT: sb a6, 3(a2) +; RV32I-NEXT: and a1, s2, a7 +; RV32I-NEXT: srli a4, s11, 8 +; RV32I-NEXT: sb t2, 4(a2) +; RV32I-NEXT: sb a4, 5(a2) +; RV32I-NEXT: sb a3, 6(a2) +; RV32I-NEXT: sb t1, 7(a2) +; RV32I-NEXT: and a3, s6, a7 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb s2, 8(a2) +; RV32I-NEXT: sb a1, 9(a2) +; RV32I-NEXT: sb a5, 10(a2) +; RV32I-NEXT: sb t5, 11(a2) +; RV32I-NEXT: and a1, s9, a7 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb s6, 12(a2) ; RV32I-NEXT: sb a3, 13(a2) -; RV32I-NEXT: sb t3, 14(a2) -; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a3, s1, t2 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s2, 16(a2) -; RV32I-NEXT: sb a4, 17(a2) -; RV32I-NEXT: sb t6, 18(a2) -; RV32I-NEXT: sb a1, 19(a2) -; RV32I-NEXT: and a1, s6, t2 -; RV32I-NEXT: and a4, a0, t2 +; RV32I-NEXT: sb t0, 14(a2) +; RV32I-NEXT: sb t6, 15(a2) +; RV32I-NEXT: and a3, ra, a7 +; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: sb s9, 16(a2) +; RV32I-NEXT: sb a1, 17(a2) +; RV32I-NEXT: sb t4, 18(a2) +; RV32I-NEXT: sb s4, 19(a2) +; RV32I-NEXT: and a1, a0, a7 +; RV32I-NEXT: and a4, s3, a7 ; RV32I-NEXT: srli a3, a3, 8 ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb s1, 20(a2) +; RV32I-NEXT: sb ra, 20(a2) ; RV32I-NEXT: sb a3, 21(a2) -; RV32I-NEXT: sb t5, 22(a2) -; RV32I-NEXT: sb s5, 23(a2) -; RV32I-NEXT: sb s6, 24(a2) +; RV32I-NEXT: sb t3, 22(a2) +; RV32I-NEXT: sb s1, 23(a2) +; RV32I-NEXT: sb a0, 24(a2) ; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s4, 26(a2) -; RV32I-NEXT: sb s7, 27(a2) -; RV32I-NEXT: sb a0, 28(a2) +; RV32I-NEXT: sb s0, 26(a2) +; RV32I-NEXT: sb s5, 27(a2) +; RV32I-NEXT: sb s3, 28(a2) ; RV32I-NEXT: sb a4, 29(a2) -; RV32I-NEXT: sb s8, 30(a2) -; RV32I-NEXT: sb s9, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: sb s7, 30(a2) +; RV32I-NEXT: sb s8, 31(a2) +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret +; RV32I-NEXT: .LBB17_190: +; RV32I-NEXT: or t3, t1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_176 +; RV32I-NEXT: .LBB17_191: +; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: beq a5, s8, .LBB17_177 +; RV32I-NEXT: j .LBB17_178 +; RV32I-NEXT: .LBB17_192: +; RV32I-NEXT: mv a3, s10 +; RV32I-NEXT: bne a5, a6, .LBB17_180 +; RV32I-NEXT: .LBB17_193: +; RV32I-NEXT: or t3, s11, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_181 +; RV32I-NEXT: .LBB17_194: +; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s4, .LBB17_182 +; RV32I-NEXT: .LBB17_195: +; RV32I-NEXT: lw a6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_183 +; RV32I-NEXT: .LBB17_196: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s1, .LBB17_184 +; RV32I-NEXT: .LBB17_197: +; RV32I-NEXT: or t3, s7, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t6, .LBB17_185 +; RV32I-NEXT: .LBB17_198: +; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne a5, s0, .LBB17_186 +; RV32I-NEXT: .LBB17_199: +; RV32I-NEXT: lw a6, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: or t3, a6, a3 +; RV32I-NEXT: li a3, 7 +; RV32I-NEXT: beq a5, a3, .LBB17_187 +; RV32I-NEXT: .LBB17_200: +; RV32I-NEXT: mv t5, t3 +; RV32I-NEXT: bnez a1, .LBB17_188 +; RV32I-NEXT: j .LBB17_189 %src = load i256, ptr %src.ptr, align 1 %dwordOff = load i256, ptr %dwordOff.ptr, align 1 %bitOff = shl i256 %dwordOff, 6 @@ -8500,617 +9800,893 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: ashr_32bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 16(a0) -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 18(a0) -; RV32I-NEXT: lbu a6, 19(a0) -; RV32I-NEXT: lbu a7, 20(a0) -; RV32I-NEXT: lbu t0, 21(a0) -; RV32I-NEXT: lbu t1, 22(a0) -; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu t0, 2(a0) +; RV32I-NEXT: lbu t1, 3(a0) +; RV32I-NEXT: lbu t2, 4(a0) +; RV32I-NEXT: lbu t3, 5(a0) +; RV32I-NEXT: lbu t4, 6(a0) +; RV32I-NEXT: lbu t5, 7(a0) +; RV32I-NEXT: lbu t6, 8(a0) +; RV32I-NEXT: lbu s0, 9(a0) +; RV32I-NEXT: lbu s1, 10(a0) +; RV32I-NEXT: lbu s2, 11(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t3, t4, t3 -; RV32I-NEXT: or t4, t6, t5 -; RV32I-NEXT: or t0, t0, a7 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: or a4, a4, a3 +; RV32I-NEXT: lbu a7, 13(a0) +; RV32I-NEXT: lbu a6, 14(a0) +; RV32I-NEXT: lbu a3, 15(a0) +; RV32I-NEXT: lbu s3, 28(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 30(a0) +; RV32I-NEXT: lbu t5, 31(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s4, t2, 8 +; RV32I-NEXT: or t2, s0, t6 +; RV32I-NEXT: or s0, s2, s1 +; RV32I-NEXT: or s1, s4, s3 +; RV32I-NEXT: lbu t6, 0(a1) +; RV32I-NEXT: lbu s2, 1(a1) +; RV32I-NEXT: lbu s3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s0, t5, a7 +; RV32I-NEXT: or s4, t5, t3 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s2, s2, t6 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 -; RV32I-NEXT: li t5, 32 -; RV32I-NEXT: slli a7, a4, 16 -; RV32I-NEXT: slli a1, a5, 16 +; RV32I-NEXT: or a1, a1, s3 +; RV32I-NEXT: slli t3, a3, 8 +; RV32I-NEXT: slli t6, t0, 16 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or a4, t1, t0 -; RV32I-NEXT: or a5, a5, s0 -; RV32I-NEXT: slli a5, a5, 3 -; RV32I-NEXT: srl s0, t2, a5 -; RV32I-NEXT: neg s6, a5 -; RV32I-NEXT: sll s1, a4, s6 -; RV32I-NEXT: bltu a5, t5, .LBB18_2 +; RV32I-NEXT: slli t5, s0, 16 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: slli a3, a1, 16 +; RV32I-NEXT: or s5, t4, t1 +; RV32I-NEXT: or a1, s4, s1 +; RV32I-NEXT: or t0, a3, s2 +; RV32I-NEXT: slli t0, t0, 3 +; RV32I-NEXT: srli t1, t0, 5 +; RV32I-NEXT: andi t4, t0, 31 +; RV32I-NEXT: neg a3, t4 +; RV32I-NEXT: beqz t4, .LBB18_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra t0, a4, a5 -; RV32I-NEXT: j .LBB18_3 +; RV32I-NEXT: sll a5, s5, a3 ; RV32I-NEXT: .LBB18_2: -; RV32I-NEXT: or t0, s0, s1 -; RV32I-NEXT: .LBB18_3: -; RV32I-NEXT: or t1, a7, a3 -; RV32I-NEXT: or a7, a1, a6 -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beqz a5, .LBB18_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t3, t0 +; RV32I-NEXT: or s10, t6, a4 +; RV32I-NEXT: lbu t6, 12(a0) +; RV32I-NEXT: lbu s0, 19(a0) +; RV32I-NEXT: slli s1, a7, 8 +; RV32I-NEXT: or a6, t3, a6 +; RV32I-NEXT: or a4, t5, t2 +; RV32I-NEXT: srai t2, a1, 31 +; RV32I-NEXT: beqz t1, .LBB18_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB18_5 +; RV32I-NEXT: .LBB18_4: +; RV32I-NEXT: srl a7, s10, t0 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: .LBB18_5: -; RV32I-NEXT: srl a3, t1, a5 -; RV32I-NEXT: sll a1, a7, s6 -; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a5, t5, .LBB18_7 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu s3, 17(a0) +; RV32I-NEXT: lbu t3, 18(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or s4, s1, t6 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: li s6, 1 +; RV32I-NEXT: sll s2, a4, a3 +; RV32I-NEXT: beqz t4, .LBB18_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srai t4, a4, 31 -; RV32I-NEXT: srl a1, a7, a5 -; RV32I-NEXT: j .LBB18_8 +; RV32I-NEXT: mv a7, s2 ; RV32I-NEXT: .LBB18_7: -; RV32I-NEXT: sra t4, a4, a5 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: .LBB18_8: -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: mv t0, t1 -; RV32I-NEXT: beqz a5, .LBB18_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t0, a1 -; RV32I-NEXT: .LBB18_10: -; RV32I-NEXT: sub s7, t6, a5 -; RV32I-NEXT: bltu a5, t5, .LBB18_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: j .LBB18_13 -; RV32I-NEXT: .LBB18_12: -; RV32I-NEXT: srl a1, a7, a5 +; RV32I-NEXT: lbu t5, 16(a0) +; RV32I-NEXT: lbu t6, 23(a0) +; RV32I-NEXT: slli s1, s3, 8 +; RV32I-NEXT: or s0, s0, t3 +; RV32I-NEXT: srl s3, s5, t0 +; RV32I-NEXT: or a6, a6, s4 +; RV32I-NEXT: bne t1, s6, .LBB18_9 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: or a5, s3, a7 +; RV32I-NEXT: .LBB18_9: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s6, 21(a0) +; RV32I-NEXT: lbu a7, 22(a0) +; RV32I-NEXT: slli s4, t6, 8 +; RV32I-NEXT: or s7, s1, t5 +; RV32I-NEXT: slli s8, s0, 16 +; RV32I-NEXT: li s9, 2 +; RV32I-NEXT: sll s0, a6, a3 +; RV32I-NEXT: beqz t4, .LBB18_11 +; RV32I-NEXT: # %bb.10: +; RV32I-NEXT: mv t3, s0 +; RV32I-NEXT: .LBB18_11: +; RV32I-NEXT: lbu t5, 20(a0) +; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: slli s6, s6, 8 +; RV32I-NEXT: or s4, s4, a7 +; RV32I-NEXT: srl s1, a4, t0 +; RV32I-NEXT: or a7, s8, s7 +; RV32I-NEXT: bne t1, s9, .LBB18_13 +; RV32I-NEXT: # %bb.12: +; RV32I-NEXT: or a5, s1, t3 ; RV32I-NEXT: .LBB18_13: -; RV32I-NEXT: sw a3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: neg s10, s7 -; RV32I-NEXT: bltu s7, t5, .LBB18_15 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s8, 25(a0) +; RV32I-NEXT: lbu s7, 26(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: or s6, s6, t5 +; RV32I-NEXT: slli s9, s4, 16 +; RV32I-NEXT: li s11, 3 +; RV32I-NEXT: sll t5, a7, a3 +; RV32I-NEXT: beqz t4, .LBB18_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: sll a3, t2, s7 -; RV32I-NEXT: j .LBB18_16 +; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB18_15: -; RV32I-NEXT: sll a6, t2, s6 -; RV32I-NEXT: srl a3, t2, s10 -; RV32I-NEXT: or a3, a3, s1 -; RV32I-NEXT: .LBB18_16: -; RV32I-NEXT: sw t3, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi s9, a5, -64 -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz s7, .LBB18_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t3, a3 -; RV32I-NEXT: .LBB18_18: -; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t5, .LBB18_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: sra s0, a4, s9 -; RV32I-NEXT: j .LBB18_21 -; RV32I-NEXT: .LBB18_20: -; RV32I-NEXT: sll a3, a4, s11 -; RV32I-NEXT: or s0, s0, a3 +; RV32I-NEXT: lbu s4, 24(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or s7, t6, s7 +; RV32I-NEXT: srl t6, a6, t0 +; RV32I-NEXT: or a0, s9, s6 +; RV32I-NEXT: sw s5, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: bne t1, s11, .LBB18_17 +; RV32I-NEXT: # %bb.16: +; RV32I-NEXT: or a5, t6, t3 +; RV32I-NEXT: .LBB18_17: +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: or t3, s8, s4 +; RV32I-NEXT: slli s7, s7, 16 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: sll s11, a0, a3 +; RV32I-NEXT: beqz t4, .LBB18_19 +; RV32I-NEXT: # %bb.18: +; RV32I-NEXT: mv s6, s11 +; RV32I-NEXT: .LBB18_19: +; RV32I-NEXT: srl s4, a7, t0 +; RV32I-NEXT: or t3, s7, t3 +; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: bne t1, s10, .LBB18_21 +; RV32I-NEXT: # %bb.20: +; RV32I-NEXT: or a5, s4, s6 ; RV32I-NEXT: .LBB18_21: -; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beqz s9, .LBB18_23 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s5, 5 +; RV32I-NEXT: sll s6, t3, a3 +; RV32I-NEXT: sw s6, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t4, .LBB18_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv t4, s0 +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB18_23: -; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) -; RV32I-NEXT: lbu s8, 13(a0) -; RV32I-NEXT: lbu ra, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: bltu s9, t5, .LBB18_25 +; RV32I-NEXT: srl s6, a0, t0 +; RV32I-NEXT: beq t1, s5, .LBB18_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: srai s0, a4, 31 +; RV32I-NEXT: mv ra, s6 ; RV32I-NEXT: j .LBB18_26 ; RV32I-NEXT: .LBB18_25: -; RV32I-NEXT: sra s0, a4, a5 +; RV32I-NEXT: mv ra, s6 +; RV32I-NEXT: or a5, s6, s4 ; RV32I-NEXT: .LBB18_26: -; RV32I-NEXT: or s1, s3, s1 -; RV32I-NEXT: lbu s5, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, s8, 8 -; RV32I-NEXT: or s8, a3, ra -; RV32I-NEXT: bgeu a5, t6, .LBB18_28 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: sll s7, a1, a3 +; RV32I-NEXT: beqz t4, .LBB18_28 ; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: or t4, t0, a6 -; RV32I-NEXT: or s0, a1, t3 +; RV32I-NEXT: mv s4, s7 ; RV32I-NEXT: .LBB18_28: -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: or a6, s2, s5 -; RV32I-NEXT: slli s2, s1, 16 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: mv a1, t1 -; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: beqz a5, .LBB18_30 +; RV32I-NEXT: srl s5, t3, t0 +; RV32I-NEXT: beq t1, s8, .LBB18_30 ; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv a1, t4 -; RV32I-NEXT: mv t0, s0 +; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: j .LBB18_31 ; RV32I-NEXT: .LBB18_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) -; RV32I-NEXT: lbu s3, 5(a0) -; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s4, t3, 8 -; RV32I-NEXT: or t4, s2, a6 -; RV32I-NEXT: or t3, s8, s1 -; RV32I-NEXT: bltu a5, t6, .LBB18_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: srai a6, a4, 31 -; RV32I-NEXT: sw a6, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a6, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB18_32: -; RV32I-NEXT: slli a6, ra, 8 -; RV32I-NEXT: or a3, s5, a3 -; RV32I-NEXT: lbu s1, 0(a0) -; RV32I-NEXT: lbu a0, 4(a0) -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: or s0, s4, s0 -; RV32I-NEXT: srl s2, t4, a5 -; RV32I-NEXT: sll ra, t3, s6 -; RV32I-NEXT: bltu a5, t5, .LBB18_34 -; RV32I-NEXT: # %bb.33: -; RV32I-NEXT: srl s4, t3, a5 -; RV32I-NEXT: j .LBB18_35 -; RV32I-NEXT: .LBB18_34: -; RV32I-NEXT: or s4, s2, ra +; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: or a5, s5, s4 +; RV32I-NEXT: .LBB18_31: +; RV32I-NEXT: li s5, 0 +; RV32I-NEXT: li s6, 7 +; RV32I-NEXT: sll s4, t2, a3 +; RV32I-NEXT: beqz t4, .LBB18_33 +; RV32I-NEXT: # %bb.32: +; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: .LBB18_33: +; RV32I-NEXT: srl a3, a1, t0 +; RV32I-NEXT: bne t1, s6, .LBB18_35 +; RV32I-NEXT: # %bb.34: +; RV32I-NEXT: or a5, a3, s5 ; RV32I-NEXT: .LBB18_35: -; RV32I-NEXT: or a6, a6, s1 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: or a0, s3, a0 -; RV32I-NEXT: slli s1, s0, 16 -; RV32I-NEXT: mv s5, t4 -; RV32I-NEXT: beqz a5, .LBB18_37 +; RV32I-NEXT: li s5, 3 +; RV32I-NEXT: mv s6, a3 +; RV32I-NEXT: bnez t0, .LBB18_39 ; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_40 ; RV32I-NEXT: .LBB18_37: -; RV32I-NEXT: or s0, a3, a6 -; RV32I-NEXT: or a0, s1, a0 -; RV32I-NEXT: bltu a5, t5, .LBB18_39 -; RV32I-NEXT: # %bb.38: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: srl a3, a0, a5 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: bnez a5, .LBB18_40 -; RV32I-NEXT: j .LBB18_41 +; RV32I-NEXT: beqz t1, .LBB18_41 +; RV32I-NEXT: .LBB18_38: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB18_42 ; RV32I-NEXT: .LBB18_39: -; RV32I-NEXT: srl s4, t3, a5 -; RV32I-NEXT: srl a3, s0, a5 -; RV32I-NEXT: sll a6, a0, s6 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: beqz a5, .LBB18_41 +; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_37 ; RV32I-NEXT: .LBB18_40: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: bnez t1, .LBB18_38 ; RV32I-NEXT: .LBB18_41: -; RV32I-NEXT: bltu a5, t5, .LBB18_44 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: bgeu s7, t5, .LBB18_45 -; RV32I-NEXT: .LBB18_43: -; RV32I-NEXT: sll s3, t4, s6 -; RV32I-NEXT: srl a3, t4, s10 -; RV32I-NEXT: or a3, a3, ra -; RV32I-NEXT: mv s10, t3 -; RV32I-NEXT: bnez s7, .LBB18_46 -; RV32I-NEXT: j .LBB18_47 +; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: .LBB18_42: +; RV32I-NEXT: li s2, 1 +; RV32I-NEXT: li s3, 2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_61 +; RV32I-NEXT: # %bb.43: +; RV32I-NEXT: beq t1, s2, .LBB18_62 ; RV32I-NEXT: .LBB18_44: -; RV32I-NEXT: srl s1, a0, a5 -; RV32I-NEXT: bltu s7, t5, .LBB18_43 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_63 ; RV32I-NEXT: .LBB18_45: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t4, s7 -; RV32I-NEXT: mv s10, t3 -; RV32I-NEXT: beqz s7, .LBB18_47 +; RV32I-NEXT: beq t1, s3, .LBB18_64 ; RV32I-NEXT: .LBB18_46: -; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_65 ; RV32I-NEXT: .LBB18_47: -; RV32I-NEXT: bltu s9, t5, .LBB18_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl a3, t3, s9 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: bnez s9, .LBB18_50 -; RV32I-NEXT: j .LBB18_51 +; RV32I-NEXT: beq t1, s5, .LBB18_66 +; RV32I-NEXT: .LBB18_48: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_67 ; RV32I-NEXT: .LBB18_49: -; RV32I-NEXT: sll a3, t3, s11 -; RV32I-NEXT: or a3, s2, a3 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: beqz s9, .LBB18_51 +; RV32I-NEXT: bne t1, s10, .LBB18_51 ; RV32I-NEXT: .LBB18_50: -; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: or a5, ra, a3 ; RV32I-NEXT: .LBB18_51: -; RV32I-NEXT: bltu s9, t5, .LBB18_53 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li s10, 5 +; RV32I-NEXT: bnez t4, .LBB18_68 ; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bltu a5, t6, .LBB18_54 -; RV32I-NEXT: j .LBB18_55 +; RV32I-NEXT: beq t1, s10, .LBB18_69 ; RV32I-NEXT: .LBB18_53: -; RV32I-NEXT: srl s7, t3, a5 -; RV32I-NEXT: bgeu a5, t6, .LBB18_55 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_70 ; RV32I-NEXT: .LBB18_54: -; RV32I-NEXT: or s2, a6, s3 -; RV32I-NEXT: or s7, s1, s10 +; RV32I-NEXT: bne t1, s8, .LBB18_56 ; RV32I-NEXT: .LBB18_55: -; RV32I-NEXT: li a3, 128 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: beqz a5, .LBB18_57 -; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv a6, s2 -; RV32I-NEXT: mv s1, s7 -; RV32I-NEXT: .LBB18_57: -; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s2, a3, a5 -; RV32I-NEXT: bltu a5, t6, .LBB18_59 -; RV32I-NEXT: # %bb.58: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: .LBB18_56: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: li s8, 7 +; RV32I-NEXT: bne t1, s8, .LBB18_71 +; RV32I-NEXT: # %bb.57: +; RV32I-NEXT: bnez t0, .LBB18_72 +; RV32I-NEXT: .LBB18_58: +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: bnez t4, .LBB18_73 ; RV32I-NEXT: .LBB18_59: -; RV32I-NEXT: neg s3, s2 -; RV32I-NEXT: srl a6, t1, s3 -; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t5, .LBB18_61 -; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: li s11, 0 -; RV32I-NEXT: sll a3, t1, s2 -; RV32I-NEXT: j .LBB18_62 +; RV32I-NEXT: beqz t1, .LBB18_74 +; RV32I-NEXT: .LBB18_60: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB18_75 ; RV32I-NEXT: .LBB18_61: -; RV32I-NEXT: sll s11, t1, s6 -; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: bne t1, s2, .LBB18_44 ; RV32I-NEXT: .LBB18_62: -; RV32I-NEXT: sub s1, t6, s2 -; RV32I-NEXT: mv s8, a7 -; RV32I-NEXT: beqz s2, .LBB18_64 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv s8, a3 +; RV32I-NEXT: or a5, s1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_45 +; RV32I-NEXT: .LBB18_63: +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne t1, s3, .LBB18_46 ; RV32I-NEXT: .LBB18_64: -; RV32I-NEXT: bltu s1, t5, .LBB18_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: srl a3, a7, s1 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: bnez s1, .LBB18_67 -; RV32I-NEXT: j .LBB18_68 +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_47 +; RV32I-NEXT: .LBB18_65: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne t1, s5, .LBB18_48 ; RV32I-NEXT: .LBB18_66: -; RV32I-NEXT: neg a3, s1 -; RV32I-NEXT: sll a3, a7, a3 -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: beqz s1, .LBB18_68 +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_49 ; RV32I-NEXT: .LBB18_67: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq t1, s10, .LBB18_50 +; RV32I-NEXT: j .LBB18_51 ; RV32I-NEXT: .LBB18_68: -; RV32I-NEXT: bltu s1, t5, .LBB18_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, t5, .LBB18_72 +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s10, .LBB18_53 +; RV32I-NEXT: .LBB18_69: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_54 ; RV32I-NEXT: .LBB18_70: -; RV32I-NEXT: sll s6, t2, s6 -; RV32I-NEXT: srl a3, t2, s3 -; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB18_73 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: beq t1, s8, .LBB18_55 +; RV32I-NEXT: j .LBB18_56 ; RV32I-NEXT: .LBB18_71: -; RV32I-NEXT: srl s1, a7, s3 -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t5, .LBB18_70 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB18_58 ; RV32I-NEXT: .LBB18_72: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: sll a3, t2, s2 +; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: beqz t4, .LBB18_59 ; RV32I-NEXT: .LBB18_73: -; RV32I-NEXT: addi s9, s2, -64 -; RV32I-NEXT: mv s5, a4 -; RV32I-NEXT: beqz s2, .LBB18_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: mv a5, s0 +; RV32I-NEXT: bnez t1, .LBB18_60 +; RV32I-NEXT: .LBB18_74: +; RV32I-NEXT: or a5, s1, a5 ; RV32I-NEXT: .LBB18_75: -; RV32I-NEXT: bltu s9, t5, .LBB18_77 +; RV32I-NEXT: li s0, 4 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_91 ; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t1, s9 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: bnez s9, .LBB18_78 -; RV32I-NEXT: j .LBB18_79 +; RV32I-NEXT: beq t1, s2, .LBB18_92 ; RV32I-NEXT: .LBB18_77: -; RV32I-NEXT: sll s3, t1, s2 -; RV32I-NEXT: neg a3, s9 -; RV32I-NEXT: srl a3, t1, a3 -; RV32I-NEXT: sll s4, a7, s2 -; RV32I-NEXT: or a3, a3, s4 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: beqz s9, .LBB18_79 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_93 ; RV32I-NEXT: .LBB18_78: -; RV32I-NEXT: mv s7, a3 +; RV32I-NEXT: beq t1, s3, .LBB18_94 ; RV32I-NEXT: .LBB18_79: -; RV32I-NEXT: bltu s2, t6, .LBB18_81 -; RV32I-NEXT: # %bb.80: -; RV32I-NEXT: li s11, 0 -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: j .LBB18_82 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_95 +; RV32I-NEXT: .LBB18_80: +; RV32I-NEXT: beq t1, s5, .LBB18_96 ; RV32I-NEXT: .LBB18_81: -; RV32I-NEXT: or s3, a6, s6 -; RV32I-NEXT: or s7, s1, s5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_97 ; RV32I-NEXT: .LBB18_82: -; RV32I-NEXT: addi ra, a5, -128 -; RV32I-NEXT: mv s4, t2 -; RV32I-NEXT: mv s6, a4 -; RV32I-NEXT: beqz s2, .LBB18_84 -; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: mv s4, s3 -; RV32I-NEXT: mv s6, s7 +; RV32I-NEXT: beq t1, s0, .LBB18_98 +; RV32I-NEXT: .LBB18_83: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_99 ; RV32I-NEXT: .LBB18_84: -; RV32I-NEXT: neg s9, ra -; RV32I-NEXT: sll s3, a4, s9 -; RV32I-NEXT: bltu ra, t5, .LBB18_86 -; RV32I-NEXT: # %bb.85: -; RV32I-NEXT: sra a3, a4, ra -; RV32I-NEXT: mv s1, t2 -; RV32I-NEXT: bnez ra, .LBB18_87 -; RV32I-NEXT: j .LBB18_88 +; RV32I-NEXT: beq t1, s10, .LBB18_100 +; RV32I-NEXT: .LBB18_85: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB18_101 ; RV32I-NEXT: .LBB18_86: -; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: mv s1, t2 -; RV32I-NEXT: beqz ra, .LBB18_88 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB18_102 ; RV32I-NEXT: .LBB18_87: -; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: bnez t0, .LBB18_103 ; RV32I-NEXT: .LBB18_88: -; RV32I-NEXT: bltu ra, t5, .LBB18_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: srai s2, a4, 31 -; RV32I-NEXT: srl a3, a7, ra -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: bnez ra, .LBB18_91 -; RV32I-NEXT: j .LBB18_92 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_104 +; RV32I-NEXT: .LBB18_89: +; RV32I-NEXT: beqz t1, .LBB18_105 ; RV32I-NEXT: .LBB18_90: -; RV32I-NEXT: sra s2, a4, a5 -; RV32I-NEXT: sll a3, a7, s9 -; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: beqz ra, .LBB18_92 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_106 +; RV32I-NEXT: j .LBB18_107 ; RV32I-NEXT: .LBB18_91: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne t1, s2, .LBB18_77 ; RV32I-NEXT: .LBB18_92: -; RV32I-NEXT: mv s5, t0 -; RV32I-NEXT: sub s10, t6, ra -; RV32I-NEXT: li t0, 64 -; RV32I-NEXT: bltu ra, t5, .LBB18_94 -; RV32I-NEXT: # %bb.93: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: j .LBB18_95 +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_78 +; RV32I-NEXT: .LBB18_93: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne t1, s3, .LBB18_79 ; RV32I-NEXT: .LBB18_94: -; RV32I-NEXT: srl s7, a7, a5 +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_80 ; RV32I-NEXT: .LBB18_95: -; RV32I-NEXT: mv t6, s8 -; RV32I-NEXT: mv s8, s11 -; RV32I-NEXT: bltu s10, t5, .LBB18_97 -; RV32I-NEXT: # %bb.96: -; RV32I-NEXT: li s9, 0 -; RV32I-NEXT: sll a3, t2, s10 -; RV32I-NEXT: j .LBB18_98 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s5, .LBB18_81 +; RV32I-NEXT: .LBB18_96: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_82 ; RV32I-NEXT: .LBB18_97: -; RV32I-NEXT: sll s9, t2, s9 -; RV32I-NEXT: neg a3, s10 -; RV32I-NEXT: srl a3, t2, a3 -; RV32I-NEXT: or a3, a3, s3 +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s0, .LBB18_83 ; RV32I-NEXT: .LBB18_98: -; RV32I-NEXT: addi s11, ra, -64 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: beqz s10, .LBB18_100 -; RV32I-NEXT: # %bb.99: -; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_84 +; RV32I-NEXT: .LBB18_99: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s10, .LBB18_85 ; RV32I-NEXT: .LBB18_100: -; RV32I-NEXT: bltu s11, t5, .LBB18_102 -; RV32I-NEXT: # %bb.101: -; RV32I-NEXT: sra a3, a4, s11 -; RV32I-NEXT: bnez s11, .LBB18_103 -; RV32I-NEXT: j .LBB18_104 +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB18_86 +; RV32I-NEXT: .LBB18_101: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB18_87 ; RV32I-NEXT: .LBB18_102: -; RV32I-NEXT: srl a3, t2, ra -; RV32I-NEXT: mv s10, s4 -; RV32I-NEXT: neg s4, s11 -; RV32I-NEXT: sll s4, a4, s4 -; RV32I-NEXT: or a3, a3, s4 -; RV32I-NEXT: mv s4, s10 -; RV32I-NEXT: beqz s11, .LBB18_104 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB18_88 ; RV32I-NEXT: .LBB18_103: -; RV32I-NEXT: mv t2, a3 +; RV32I-NEXT: mv a4, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_89 ; RV32I-NEXT: .LBB18_104: -; RV32I-NEXT: bltu s11, t5, .LBB18_106 -; RV32I-NEXT: # %bb.105: -; RV32I-NEXT: srai t5, a4, 31 -; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu ra, t0, .LBB18_107 -; RV32I-NEXT: j .LBB18_108 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bnez t1, .LBB18_90 +; RV32I-NEXT: .LBB18_105: +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_107 ; RV32I-NEXT: .LBB18_106: -; RV32I-NEXT: sra t5, a4, ra -; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu ra, t0, .LBB18_108 +; RV32I-NEXT: mv a3, s11 ; RV32I-NEXT: .LBB18_107: -; RV32I-NEXT: or t2, a6, s9 -; RV32I-NEXT: or t5, s7, s3 -; RV32I-NEXT: .LBB18_108: -; RV32I-NEXT: li a6, 128 -; RV32I-NEXT: bnez ra, .LBB18_117 -; RV32I-NEXT: # %bb.109: -; RV32I-NEXT: bgeu ra, t0, .LBB18_118 +; RV32I-NEXT: beq t1, s2, .LBB18_121 +; RV32I-NEXT: # %bb.108: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_122 +; RV32I-NEXT: .LBB18_109: +; RV32I-NEXT: beq t1, s3, .LBB18_123 ; RV32I-NEXT: .LBB18_110: -; RV32I-NEXT: bgeu a5, a6, .LBB18_112 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_124 ; RV32I-NEXT: .LBB18_111: -; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t1, a3, s8 -; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a7, a3, t6 +; RV32I-NEXT: beq t1, s5, .LBB18_125 +; RV32I-NEXT: .LBB18_112: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_126 +; RV32I-NEXT: .LBB18_113: +; RV32I-NEXT: beq t1, s0, .LBB18_127 +; RV32I-NEXT: .LBB18_114: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s10, .LBB18_128 +; RV32I-NEXT: .LBB18_115: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s1, .LBB18_129 +; RV32I-NEXT: .LBB18_116: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s8, .LBB18_130 +; RV32I-NEXT: .LBB18_117: +; RV32I-NEXT: bnez t0, .LBB18_131 +; RV32I-NEXT: .LBB18_118: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_132 +; RV32I-NEXT: .LBB18_119: +; RV32I-NEXT: beqz t1, .LBB18_133 +; RV32I-NEXT: .LBB18_120: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB18_134 +; RV32I-NEXT: .LBB18_121: +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_109 +; RV32I-NEXT: .LBB18_122: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s3, .LBB18_110 +; RV32I-NEXT: .LBB18_123: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_111 +; RV32I-NEXT: .LBB18_124: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s5, .LBB18_112 +; RV32I-NEXT: .LBB18_125: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_113 +; RV32I-NEXT: .LBB18_126: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s0, .LBB18_114 +; RV32I-NEXT: .LBB18_127: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s10, .LBB18_115 +; RV32I-NEXT: .LBB18_128: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s1, .LBB18_116 +; RV32I-NEXT: .LBB18_129: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s8, .LBB18_117 +; RV32I-NEXT: .LBB18_130: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB18_118 +; RV32I-NEXT: .LBB18_131: +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_119 +; RV32I-NEXT: .LBB18_132: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bnez t1, .LBB18_120 +; RV32I-NEXT: .LBB18_133: +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: .LBB18_134: +; RV32I-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_148 +; RV32I-NEXT: # %bb.135: +; RV32I-NEXT: beq t1, s2, .LBB18_149 +; RV32I-NEXT: .LBB18_136: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_150 +; RV32I-NEXT: .LBB18_137: +; RV32I-NEXT: beq t1, s3, .LBB18_151 +; RV32I-NEXT: .LBB18_138: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_152 +; RV32I-NEXT: .LBB18_139: +; RV32I-NEXT: beq t1, s5, .LBB18_153 +; RV32I-NEXT: .LBB18_140: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB18_154 +; RV32I-NEXT: .LBB18_141: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB18_155 +; RV32I-NEXT: .LBB18_142: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB18_156 +; RV32I-NEXT: .LBB18_143: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB18_157 +; RV32I-NEXT: .LBB18_144: +; RV32I-NEXT: bnez t0, .LBB18_158 +; RV32I-NEXT: .LBB18_145: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_159 +; RV32I-NEXT: .LBB18_146: +; RV32I-NEXT: beqz t1, .LBB18_160 +; RV32I-NEXT: .LBB18_147: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_161 +; RV32I-NEXT: j .LBB18_162 +; RV32I-NEXT: .LBB18_148: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s2, .LBB18_136 +; RV32I-NEXT: .LBB18_149: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_137 +; RV32I-NEXT: .LBB18_150: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s3, .LBB18_138 +; RV32I-NEXT: .LBB18_151: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_139 +; RV32I-NEXT: .LBB18_152: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s5, .LBB18_140 +; RV32I-NEXT: .LBB18_153: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB18_141 +; RV32I-NEXT: .LBB18_154: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB18_142 +; RV32I-NEXT: .LBB18_155: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB18_143 +; RV32I-NEXT: .LBB18_156: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB18_144 +; RV32I-NEXT: .LBB18_157: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB18_145 +; RV32I-NEXT: .LBB18_158: +; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_146 +; RV32I-NEXT: .LBB18_159: ; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s1, a3, s4 -; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s2, a3, s6 -; RV32I-NEXT: .LBB18_112: -; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv t0, s5 -; RV32I-NEXT: beqz a5, .LBB18_114 -; RV32I-NEXT: # %bb.113: -; RV32I-NEXT: mv s0, t1 -; RV32I-NEXT: mv a0, a7 -; RV32I-NEXT: mv t4, s1 -; RV32I-NEXT: mv t3, s2 -; RV32I-NEXT: .LBB18_114: -; RV32I-NEXT: bltu a5, a6, .LBB18_116 -; RV32I-NEXT: # %bb.115: -; RV32I-NEXT: srai a1, a4, 31 -; RV32I-NEXT: mv t0, a1 -; RV32I-NEXT: mv s11, a1 -; RV32I-NEXT: mv ra, a1 -; RV32I-NEXT: .LBB18_116: -; RV32I-NEXT: srli a4, s0, 16 -; RV32I-NEXT: lui t1, 16 -; RV32I-NEXT: srli a7, s0, 24 -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: srli a6, t4, 16 -; RV32I-NEXT: srli s2, t4, 24 -; RV32I-NEXT: srli t2, t3, 16 -; RV32I-NEXT: srli s3, t3, 24 -; RV32I-NEXT: srli s1, a1, 16 -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: srli t6, t0, 16 -; RV32I-NEXT: srli s6, t0, 24 -; RV32I-NEXT: srli s5, s11, 16 -; RV32I-NEXT: srli s4, s11, 24 -; RV32I-NEXT: srli s7, ra, 16 -; RV32I-NEXT: srli s8, ra, 24 -; RV32I-NEXT: addi t1, t1, -1 -; RV32I-NEXT: and s9, s0, t1 -; RV32I-NEXT: and s10, a0, t1 +; RV32I-NEXT: bnez t1, .LBB18_147 +; RV32I-NEXT: .LBB18_160: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_162 +; RV32I-NEXT: .LBB18_161: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: .LBB18_162: +; RV32I-NEXT: beq t1, s2, .LBB18_174 +; RV32I-NEXT: # %bb.163: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_175 +; RV32I-NEXT: .LBB18_164: +; RV32I-NEXT: beq t1, s3, .LBB18_176 +; RV32I-NEXT: .LBB18_165: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s5, .LBB18_177 +; RV32I-NEXT: .LBB18_166: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s0, .LBB18_178 +; RV32I-NEXT: .LBB18_167: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s10, .LBB18_179 +; RV32I-NEXT: .LBB18_168: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s1, .LBB18_180 +; RV32I-NEXT: .LBB18_169: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s8, .LBB18_181 +; RV32I-NEXT: .LBB18_170: +; RV32I-NEXT: bnez t0, .LBB18_182 +; RV32I-NEXT: .LBB18_171: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_183 +; RV32I-NEXT: .LBB18_172: +; RV32I-NEXT: beqz t1, .LBB18_184 +; RV32I-NEXT: .LBB18_173: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_185 +; RV32I-NEXT: j .LBB18_186 +; RV32I-NEXT: .LBB18_174: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_164 +; RV32I-NEXT: .LBB18_175: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s3, .LBB18_165 +; RV32I-NEXT: .LBB18_176: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s5, .LBB18_166 +; RV32I-NEXT: .LBB18_177: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s0, .LBB18_167 +; RV32I-NEXT: .LBB18_178: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s10, .LBB18_168 +; RV32I-NEXT: .LBB18_179: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s1, .LBB18_169 +; RV32I-NEXT: .LBB18_180: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s8, .LBB18_170 +; RV32I-NEXT: .LBB18_181: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB18_171 +; RV32I-NEXT: .LBB18_182: +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_172 +; RV32I-NEXT: .LBB18_183: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bnez t1, .LBB18_173 +; RV32I-NEXT: .LBB18_184: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_186 +; RV32I-NEXT: .LBB18_185: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: .LBB18_186: +; RV32I-NEXT: beq t1, s2, .LBB18_197 +; RV32I-NEXT: # %bb.187: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s3, .LBB18_198 +; RV32I-NEXT: .LBB18_188: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s5, .LBB18_199 +; RV32I-NEXT: .LBB18_189: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB18_200 +; RV32I-NEXT: .LBB18_190: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB18_201 +; RV32I-NEXT: .LBB18_191: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB18_202 +; RV32I-NEXT: .LBB18_192: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB18_203 +; RV32I-NEXT: .LBB18_193: +; RV32I-NEXT: bnez t0, .LBB18_204 +; RV32I-NEXT: .LBB18_194: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB18_205 +; RV32I-NEXT: .LBB18_195: +; RV32I-NEXT: beqz t1, .LBB18_206 +; RV32I-NEXT: .LBB18_196: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s2, .LBB18_207 +; RV32I-NEXT: j .LBB18_208 +; RV32I-NEXT: .LBB18_197: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s3, .LBB18_188 +; RV32I-NEXT: .LBB18_198: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s5, .LBB18_189 +; RV32I-NEXT: .LBB18_199: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB18_190 +; RV32I-NEXT: .LBB18_200: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB18_191 +; RV32I-NEXT: .LBB18_201: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB18_192 +; RV32I-NEXT: .LBB18_202: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB18_193 +; RV32I-NEXT: .LBB18_203: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB18_194 +; RV32I-NEXT: .LBB18_204: +; RV32I-NEXT: mv t3, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB18_195 +; RV32I-NEXT: .LBB18_205: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bnez t1, .LBB18_196 +; RV32I-NEXT: .LBB18_206: +; RV32I-NEXT: or a3, s6, a3 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s2, .LBB18_208 +; RV32I-NEXT: .LBB18_207: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: .LBB18_208: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s3, .LBB18_217 +; RV32I-NEXT: # %bb.209: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s5, .LBB18_218 +; RV32I-NEXT: .LBB18_210: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB18_219 +; RV32I-NEXT: .LBB18_211: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB18_220 +; RV32I-NEXT: .LBB18_212: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB18_221 +; RV32I-NEXT: .LBB18_213: +; RV32I-NEXT: bne t1, s8, .LBB18_222 +; RV32I-NEXT: .LBB18_214: +; RV32I-NEXT: beqz t0, .LBB18_216 +; RV32I-NEXT: .LBB18_215: +; RV32I-NEXT: mv a1, t2 +; RV32I-NEXT: .LBB18_216: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: srli a5, ra, 16 +; RV32I-NEXT: lui t4, 16 +; RV32I-NEXT: srli t2, ra, 24 +; RV32I-NEXT: srli t0, s11, 16 +; RV32I-NEXT: srli t6, s11, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli s2, a4, 24 +; RV32I-NEXT: srli t5, a6, 16 +; RV32I-NEXT: srli s3, a6, 24 +; RV32I-NEXT: srli s1, a7, 16 +; RV32I-NEXT: srli a3, a7, 24 +; RV32I-NEXT: srli s0, a0, 16 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli s4, t3, 16 +; RV32I-NEXT: srli s6, t3, 24 +; RV32I-NEXT: srli s7, a1, 16 +; RV32I-NEXT: srli s8, a1, 24 +; RV32I-NEXT: addi t4, t4, -1 +; RV32I-NEXT: and s9, ra, t4 +; RV32I-NEXT: and s10, s11, t4 ; RV32I-NEXT: srli s9, s9, 8 -; RV32I-NEXT: sb s0, 0(a2) +; RV32I-NEXT: sb ra, 0(a2) ; RV32I-NEXT: sb s9, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: and a4, t4, t1 -; RV32I-NEXT: srli a7, s10, 8 -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: sb a7, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t5, 7(a2) -; RV32I-NEXT: and a0, t3, t1 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t4, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb a6, 10(a2) +; RV32I-NEXT: sb a5, 2(a2) +; RV32I-NEXT: sb t2, 3(a2) +; RV32I-NEXT: and a5, a4, t4 +; RV32I-NEXT: srli t2, s10, 8 +; RV32I-NEXT: sb s11, 4(a2) +; RV32I-NEXT: sb t2, 5(a2) +; RV32I-NEXT: sb t0, 6(a2) +; RV32I-NEXT: sb t6, 7(a2) +; RV32I-NEXT: and t0, a6, t4 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb t1, 10(a2) ; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, a1, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb t3, 12(a2) -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t2, 14(a2) +; RV32I-NEXT: and a4, a7, t4 +; RV32I-NEXT: srli a5, t0, 8 +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a5, 13(a2) +; RV32I-NEXT: sb t5, 14(a2) ; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a0, t0, t1 +; RV32I-NEXT: and a5, a0, t4 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a1, 16(a2) +; RV32I-NEXT: sb a7, 16(a2) ; RV32I-NEXT: sb a4, 17(a2) ; RV32I-NEXT: sb s1, 18(a2) ; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a1, s11, t1 -; RV32I-NEXT: and a3, ra, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: and a3, t3, t4 +; RV32I-NEXT: and a4, a1, t4 +; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a0, 21(a2) -; RV32I-NEXT: sb t6, 22(a2) -; RV32I-NEXT: sb s6, 23(a2) -; RV32I-NEXT: sb s11, 24(a2) -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s5, 26(a2) -; RV32I-NEXT: sb s4, 27(a2) -; RV32I-NEXT: sb ra, 28(a2) -; RV32I-NEXT: sb a3, 29(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a0, 20(a2) +; RV32I-NEXT: sb a5, 21(a2) +; RV32I-NEXT: sb s0, 22(a2) +; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb t3, 24(a2) +; RV32I-NEXT: sb a3, 25(a2) +; RV32I-NEXT: sb s4, 26(a2) +; RV32I-NEXT: sb s6, 27(a2) +; RV32I-NEXT: sb a1, 28(a2) +; RV32I-NEXT: sb a4, 29(a2) ; RV32I-NEXT: sb s7, 30(a2) ; RV32I-NEXT: sb s8, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB18_117: -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: bltu ra, t0, .LBB18_110 -; RV32I-NEXT: .LBB18_118: -; RV32I-NEXT: srai s1, a4, 31 -; RV32I-NEXT: mv s2, s1 -; RV32I-NEXT: bltu a5, a6, .LBB18_111 -; RV32I-NEXT: j .LBB18_112 +; RV32I-NEXT: .LBB18_217: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s5, .LBB18_210 +; RV32I-NEXT: .LBB18_218: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB18_211 +; RV32I-NEXT: .LBB18_219: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB18_212 +; RV32I-NEXT: .LBB18_220: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB18_213 +; RV32I-NEXT: .LBB18_221: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beq t1, s8, .LBB18_214 +; RV32I-NEXT: .LBB18_222: +; RV32I-NEXT: mv t2, a3 +; RV32I-NEXT: bnez t0, .LBB18_215 +; RV32I-NEXT: j .LBB18_216 %src = load i256, ptr %src.ptr, align 1 %byteOff = load i256, ptr %byteOff.ptr, align 1 %bitOff = shl i256 %byteOff, 3 @@ -9431,617 +11007,893 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; ; RV32I-LABEL: ashr_32bytes_wordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 16(a0) -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 18(a0) -; RV32I-NEXT: lbu a6, 19(a0) -; RV32I-NEXT: lbu a7, 20(a0) -; RV32I-NEXT: lbu t0, 21(a0) -; RV32I-NEXT: lbu t1, 22(a0) -; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu t0, 2(a0) +; RV32I-NEXT: lbu t1, 3(a0) +; RV32I-NEXT: lbu t2, 4(a0) +; RV32I-NEXT: lbu t3, 5(a0) +; RV32I-NEXT: lbu t4, 6(a0) +; RV32I-NEXT: lbu t5, 7(a0) +; RV32I-NEXT: lbu t6, 8(a0) +; RV32I-NEXT: lbu s0, 9(a0) +; RV32I-NEXT: lbu s1, 10(a0) +; RV32I-NEXT: lbu s2, 11(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t3, t4, t3 -; RV32I-NEXT: or t4, t6, t5 -; RV32I-NEXT: or t0, t0, a7 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: or a4, a4, a3 +; RV32I-NEXT: lbu a7, 13(a0) +; RV32I-NEXT: lbu a6, 14(a0) +; RV32I-NEXT: lbu a3, 15(a0) +; RV32I-NEXT: lbu s3, 28(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 30(a0) +; RV32I-NEXT: lbu t5, 31(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s4, t2, 8 +; RV32I-NEXT: or t2, s0, t6 +; RV32I-NEXT: or s0, s2, s1 +; RV32I-NEXT: or s1, s4, s3 +; RV32I-NEXT: lbu t6, 0(a1) +; RV32I-NEXT: lbu s2, 1(a1) +; RV32I-NEXT: lbu s3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s0, t5, a7 +; RV32I-NEXT: or s4, t5, t3 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s2, s2, t6 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 -; RV32I-NEXT: li t5, 32 -; RV32I-NEXT: slli a7, a4, 16 -; RV32I-NEXT: slli a1, a5, 16 +; RV32I-NEXT: or a1, a1, s3 +; RV32I-NEXT: slli t3, a3, 8 +; RV32I-NEXT: slli t6, t0, 16 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or a4, t1, t0 -; RV32I-NEXT: or a5, a5, s0 -; RV32I-NEXT: slli a5, a5, 5 -; RV32I-NEXT: srl s0, t2, a5 -; RV32I-NEXT: neg s6, a5 -; RV32I-NEXT: sll s1, a4, s6 -; RV32I-NEXT: bltu a5, t5, .LBB19_2 +; RV32I-NEXT: slli t5, s0, 16 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: slli a3, a1, 16 +; RV32I-NEXT: or s5, t4, t1 +; RV32I-NEXT: or a1, s4, s1 +; RV32I-NEXT: or t0, a3, s2 +; RV32I-NEXT: slli t0, t0, 5 +; RV32I-NEXT: srli t1, t0, 5 +; RV32I-NEXT: andi t4, t0, 31 +; RV32I-NEXT: neg a3, t4 +; RV32I-NEXT: beqz t4, .LBB19_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra t0, a4, a5 -; RV32I-NEXT: j .LBB19_3 +; RV32I-NEXT: sll a5, s5, a3 ; RV32I-NEXT: .LBB19_2: -; RV32I-NEXT: or t0, s0, s1 -; RV32I-NEXT: .LBB19_3: -; RV32I-NEXT: or t1, a7, a3 -; RV32I-NEXT: or a7, a1, a6 -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beqz a5, .LBB19_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t3, t0 +; RV32I-NEXT: or s10, t6, a4 +; RV32I-NEXT: lbu t6, 12(a0) +; RV32I-NEXT: lbu s0, 19(a0) +; RV32I-NEXT: slli s1, a7, 8 +; RV32I-NEXT: or a6, t3, a6 +; RV32I-NEXT: or a4, t5, t2 +; RV32I-NEXT: srai t2, a1, 31 +; RV32I-NEXT: beqz t1, .LBB19_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB19_5 +; RV32I-NEXT: .LBB19_4: +; RV32I-NEXT: srl a7, s10, t0 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: .LBB19_5: -; RV32I-NEXT: srl a3, t1, a5 -; RV32I-NEXT: sll a1, a7, s6 -; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a5, t5, .LBB19_7 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu s3, 17(a0) +; RV32I-NEXT: lbu t3, 18(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or s4, s1, t6 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: li s6, 1 +; RV32I-NEXT: sll s2, a4, a3 +; RV32I-NEXT: beqz t4, .LBB19_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srai t4, a4, 31 -; RV32I-NEXT: srl a1, a7, a5 -; RV32I-NEXT: j .LBB19_8 +; RV32I-NEXT: mv a7, s2 ; RV32I-NEXT: .LBB19_7: -; RV32I-NEXT: sra t4, a4, a5 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: .LBB19_8: -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: mv t0, t1 -; RV32I-NEXT: beqz a5, .LBB19_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t0, a1 -; RV32I-NEXT: .LBB19_10: -; RV32I-NEXT: sub s7, t6, a5 -; RV32I-NEXT: bltu a5, t5, .LBB19_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: j .LBB19_13 -; RV32I-NEXT: .LBB19_12: -; RV32I-NEXT: srl a1, a7, a5 +; RV32I-NEXT: lbu t5, 16(a0) +; RV32I-NEXT: lbu t6, 23(a0) +; RV32I-NEXT: slli s1, s3, 8 +; RV32I-NEXT: or s0, s0, t3 +; RV32I-NEXT: srl s3, s5, t0 +; RV32I-NEXT: or a6, a6, s4 +; RV32I-NEXT: bne t1, s6, .LBB19_9 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: or a5, s3, a7 +; RV32I-NEXT: .LBB19_9: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s6, 21(a0) +; RV32I-NEXT: lbu a7, 22(a0) +; RV32I-NEXT: slli s4, t6, 8 +; RV32I-NEXT: or s7, s1, t5 +; RV32I-NEXT: slli s8, s0, 16 +; RV32I-NEXT: li s9, 2 +; RV32I-NEXT: sll s0, a6, a3 +; RV32I-NEXT: beqz t4, .LBB19_11 +; RV32I-NEXT: # %bb.10: +; RV32I-NEXT: mv t3, s0 +; RV32I-NEXT: .LBB19_11: +; RV32I-NEXT: lbu t5, 20(a0) +; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: slli s6, s6, 8 +; RV32I-NEXT: or s4, s4, a7 +; RV32I-NEXT: srl s1, a4, t0 +; RV32I-NEXT: or a7, s8, s7 +; RV32I-NEXT: bne t1, s9, .LBB19_13 +; RV32I-NEXT: # %bb.12: +; RV32I-NEXT: or a5, s1, t3 ; RV32I-NEXT: .LBB19_13: -; RV32I-NEXT: sw a3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: neg s10, s7 -; RV32I-NEXT: bltu s7, t5, .LBB19_15 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s8, 25(a0) +; RV32I-NEXT: lbu s7, 26(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: or s6, s6, t5 +; RV32I-NEXT: slli s9, s4, 16 +; RV32I-NEXT: li s11, 3 +; RV32I-NEXT: sll t5, a7, a3 +; RV32I-NEXT: beqz t4, .LBB19_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: sll a3, t2, s7 -; RV32I-NEXT: j .LBB19_16 +; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB19_15: -; RV32I-NEXT: sll a6, t2, s6 -; RV32I-NEXT: srl a3, t2, s10 -; RV32I-NEXT: or a3, a3, s1 -; RV32I-NEXT: .LBB19_16: -; RV32I-NEXT: sw t3, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi s9, a5, -64 -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz s7, .LBB19_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t3, a3 -; RV32I-NEXT: .LBB19_18: -; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t5, .LBB19_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: sra s0, a4, s9 -; RV32I-NEXT: j .LBB19_21 -; RV32I-NEXT: .LBB19_20: -; RV32I-NEXT: sll a3, a4, s11 -; RV32I-NEXT: or s0, s0, a3 +; RV32I-NEXT: lbu s4, 24(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or s7, t6, s7 +; RV32I-NEXT: srl t6, a6, t0 +; RV32I-NEXT: or a0, s9, s6 +; RV32I-NEXT: sw s5, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: bne t1, s11, .LBB19_17 +; RV32I-NEXT: # %bb.16: +; RV32I-NEXT: or a5, t6, t3 +; RV32I-NEXT: .LBB19_17: +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: or t3, s8, s4 +; RV32I-NEXT: slli s7, s7, 16 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: sll s11, a0, a3 +; RV32I-NEXT: beqz t4, .LBB19_19 +; RV32I-NEXT: # %bb.18: +; RV32I-NEXT: mv s6, s11 +; RV32I-NEXT: .LBB19_19: +; RV32I-NEXT: srl s4, a7, t0 +; RV32I-NEXT: or t3, s7, t3 +; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: bne t1, s10, .LBB19_21 +; RV32I-NEXT: # %bb.20: +; RV32I-NEXT: or a5, s4, s6 ; RV32I-NEXT: .LBB19_21: -; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beqz s9, .LBB19_23 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s5, 5 +; RV32I-NEXT: sll s6, t3, a3 +; RV32I-NEXT: sw s6, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t4, .LBB19_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv t4, s0 +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB19_23: -; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) -; RV32I-NEXT: lbu s8, 13(a0) -; RV32I-NEXT: lbu ra, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: bltu s9, t5, .LBB19_25 +; RV32I-NEXT: srl s6, a0, t0 +; RV32I-NEXT: beq t1, s5, .LBB19_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: srai s0, a4, 31 +; RV32I-NEXT: mv ra, s6 ; RV32I-NEXT: j .LBB19_26 ; RV32I-NEXT: .LBB19_25: -; RV32I-NEXT: sra s0, a4, a5 +; RV32I-NEXT: mv ra, s6 +; RV32I-NEXT: or a5, s6, s4 ; RV32I-NEXT: .LBB19_26: -; RV32I-NEXT: or s1, s3, s1 -; RV32I-NEXT: lbu s5, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, s8, 8 -; RV32I-NEXT: or s8, a3, ra -; RV32I-NEXT: bgeu a5, t6, .LBB19_28 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: sll s7, a1, a3 +; RV32I-NEXT: beqz t4, .LBB19_28 ; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: or t4, t0, a6 -; RV32I-NEXT: or s0, a1, t3 +; RV32I-NEXT: mv s4, s7 ; RV32I-NEXT: .LBB19_28: -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: or a6, s2, s5 -; RV32I-NEXT: slli s2, s1, 16 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: mv a1, t1 -; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: beqz a5, .LBB19_30 +; RV32I-NEXT: srl s5, t3, t0 +; RV32I-NEXT: beq t1, s8, .LBB19_30 ; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv a1, t4 -; RV32I-NEXT: mv t0, s0 +; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: j .LBB19_31 ; RV32I-NEXT: .LBB19_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) -; RV32I-NEXT: lbu s3, 5(a0) -; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s4, t3, 8 -; RV32I-NEXT: or t4, s2, a6 -; RV32I-NEXT: or t3, s8, s1 -; RV32I-NEXT: bltu a5, t6, .LBB19_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: srai a6, a4, 31 -; RV32I-NEXT: sw a6, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a6, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB19_32: -; RV32I-NEXT: slli a6, ra, 8 -; RV32I-NEXT: or a3, s5, a3 -; RV32I-NEXT: lbu s1, 0(a0) -; RV32I-NEXT: lbu a0, 4(a0) -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: or s0, s4, s0 -; RV32I-NEXT: srl s2, t4, a5 -; RV32I-NEXT: sll ra, t3, s6 -; RV32I-NEXT: bltu a5, t5, .LBB19_34 -; RV32I-NEXT: # %bb.33: -; RV32I-NEXT: srl s4, t3, a5 -; RV32I-NEXT: j .LBB19_35 -; RV32I-NEXT: .LBB19_34: -; RV32I-NEXT: or s4, s2, ra +; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: or a5, s5, s4 +; RV32I-NEXT: .LBB19_31: +; RV32I-NEXT: li s5, 0 +; RV32I-NEXT: li s6, 7 +; RV32I-NEXT: sll s4, t2, a3 +; RV32I-NEXT: beqz t4, .LBB19_33 +; RV32I-NEXT: # %bb.32: +; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: .LBB19_33: +; RV32I-NEXT: srl a3, a1, t0 +; RV32I-NEXT: bne t1, s6, .LBB19_35 +; RV32I-NEXT: # %bb.34: +; RV32I-NEXT: or a5, a3, s5 ; RV32I-NEXT: .LBB19_35: -; RV32I-NEXT: or a6, a6, s1 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: or a0, s3, a0 -; RV32I-NEXT: slli s1, s0, 16 -; RV32I-NEXT: mv s5, t4 -; RV32I-NEXT: beqz a5, .LBB19_37 +; RV32I-NEXT: li s5, 3 +; RV32I-NEXT: mv s6, a3 +; RV32I-NEXT: bnez t0, .LBB19_39 ; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_40 ; RV32I-NEXT: .LBB19_37: -; RV32I-NEXT: or s0, a3, a6 -; RV32I-NEXT: or a0, s1, a0 -; RV32I-NEXT: bltu a5, t5, .LBB19_39 -; RV32I-NEXT: # %bb.38: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: srl a3, a0, a5 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: bnez a5, .LBB19_40 -; RV32I-NEXT: j .LBB19_41 +; RV32I-NEXT: beqz t1, .LBB19_41 +; RV32I-NEXT: .LBB19_38: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB19_42 ; RV32I-NEXT: .LBB19_39: -; RV32I-NEXT: srl s4, t3, a5 -; RV32I-NEXT: srl a3, s0, a5 -; RV32I-NEXT: sll a6, a0, s6 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: beqz a5, .LBB19_41 +; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_37 ; RV32I-NEXT: .LBB19_40: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: bnez t1, .LBB19_38 ; RV32I-NEXT: .LBB19_41: -; RV32I-NEXT: bltu a5, t5, .LBB19_44 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: bgeu s7, t5, .LBB19_45 -; RV32I-NEXT: .LBB19_43: -; RV32I-NEXT: sll s3, t4, s6 -; RV32I-NEXT: srl a3, t4, s10 -; RV32I-NEXT: or a3, a3, ra -; RV32I-NEXT: mv s10, t3 -; RV32I-NEXT: bnez s7, .LBB19_46 -; RV32I-NEXT: j .LBB19_47 +; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: .LBB19_42: +; RV32I-NEXT: li s2, 1 +; RV32I-NEXT: li s3, 2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_61 +; RV32I-NEXT: # %bb.43: +; RV32I-NEXT: beq t1, s2, .LBB19_62 ; RV32I-NEXT: .LBB19_44: -; RV32I-NEXT: srl s1, a0, a5 -; RV32I-NEXT: bltu s7, t5, .LBB19_43 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_63 ; RV32I-NEXT: .LBB19_45: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t4, s7 -; RV32I-NEXT: mv s10, t3 -; RV32I-NEXT: beqz s7, .LBB19_47 +; RV32I-NEXT: beq t1, s3, .LBB19_64 ; RV32I-NEXT: .LBB19_46: -; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_65 ; RV32I-NEXT: .LBB19_47: -; RV32I-NEXT: bltu s9, t5, .LBB19_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl a3, t3, s9 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: bnez s9, .LBB19_50 -; RV32I-NEXT: j .LBB19_51 +; RV32I-NEXT: beq t1, s5, .LBB19_66 +; RV32I-NEXT: .LBB19_48: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_67 ; RV32I-NEXT: .LBB19_49: -; RV32I-NEXT: sll a3, t3, s11 -; RV32I-NEXT: or a3, s2, a3 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: beqz s9, .LBB19_51 +; RV32I-NEXT: bne t1, s10, .LBB19_51 ; RV32I-NEXT: .LBB19_50: -; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: or a5, ra, a3 ; RV32I-NEXT: .LBB19_51: -; RV32I-NEXT: bltu s9, t5, .LBB19_53 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li s10, 5 +; RV32I-NEXT: bnez t4, .LBB19_68 ; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bltu a5, t6, .LBB19_54 -; RV32I-NEXT: j .LBB19_55 +; RV32I-NEXT: beq t1, s10, .LBB19_69 ; RV32I-NEXT: .LBB19_53: -; RV32I-NEXT: srl s7, t3, a5 -; RV32I-NEXT: bgeu a5, t6, .LBB19_55 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_70 ; RV32I-NEXT: .LBB19_54: -; RV32I-NEXT: or s2, a6, s3 -; RV32I-NEXT: or s7, s1, s10 +; RV32I-NEXT: bne t1, s8, .LBB19_56 ; RV32I-NEXT: .LBB19_55: -; RV32I-NEXT: li a3, 128 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: beqz a5, .LBB19_57 -; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv a6, s2 -; RV32I-NEXT: mv s1, s7 -; RV32I-NEXT: .LBB19_57: -; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s2, a3, a5 -; RV32I-NEXT: bltu a5, t6, .LBB19_59 -; RV32I-NEXT: # %bb.58: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: .LBB19_56: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: li s8, 7 +; RV32I-NEXT: bne t1, s8, .LBB19_71 +; RV32I-NEXT: # %bb.57: +; RV32I-NEXT: bnez t0, .LBB19_72 +; RV32I-NEXT: .LBB19_58: +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: bnez t4, .LBB19_73 ; RV32I-NEXT: .LBB19_59: -; RV32I-NEXT: neg s3, s2 -; RV32I-NEXT: srl a6, t1, s3 -; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t5, .LBB19_61 -; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: li s11, 0 -; RV32I-NEXT: sll a3, t1, s2 -; RV32I-NEXT: j .LBB19_62 +; RV32I-NEXT: beqz t1, .LBB19_74 +; RV32I-NEXT: .LBB19_60: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB19_75 ; RV32I-NEXT: .LBB19_61: -; RV32I-NEXT: sll s11, t1, s6 -; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: bne t1, s2, .LBB19_44 ; RV32I-NEXT: .LBB19_62: -; RV32I-NEXT: sub s1, t6, s2 -; RV32I-NEXT: mv s8, a7 -; RV32I-NEXT: beqz s2, .LBB19_64 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv s8, a3 +; RV32I-NEXT: or a5, s1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_45 +; RV32I-NEXT: .LBB19_63: +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne t1, s3, .LBB19_46 ; RV32I-NEXT: .LBB19_64: -; RV32I-NEXT: bltu s1, t5, .LBB19_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: srl a3, a7, s1 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: bnez s1, .LBB19_67 -; RV32I-NEXT: j .LBB19_68 +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_47 +; RV32I-NEXT: .LBB19_65: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne t1, s5, .LBB19_48 ; RV32I-NEXT: .LBB19_66: -; RV32I-NEXT: neg a3, s1 -; RV32I-NEXT: sll a3, a7, a3 -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: beqz s1, .LBB19_68 +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_49 ; RV32I-NEXT: .LBB19_67: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq t1, s10, .LBB19_50 +; RV32I-NEXT: j .LBB19_51 ; RV32I-NEXT: .LBB19_68: -; RV32I-NEXT: bltu s1, t5, .LBB19_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, t5, .LBB19_72 +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s10, .LBB19_53 +; RV32I-NEXT: .LBB19_69: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_54 ; RV32I-NEXT: .LBB19_70: -; RV32I-NEXT: sll s6, t2, s6 -; RV32I-NEXT: srl a3, t2, s3 -; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB19_73 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: beq t1, s8, .LBB19_55 +; RV32I-NEXT: j .LBB19_56 ; RV32I-NEXT: .LBB19_71: -; RV32I-NEXT: srl s1, a7, s3 -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t5, .LBB19_70 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB19_58 ; RV32I-NEXT: .LBB19_72: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: sll a3, t2, s2 +; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: beqz t4, .LBB19_59 ; RV32I-NEXT: .LBB19_73: -; RV32I-NEXT: addi s9, s2, -64 -; RV32I-NEXT: mv s5, a4 -; RV32I-NEXT: beqz s2, .LBB19_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: mv a5, s0 +; RV32I-NEXT: bnez t1, .LBB19_60 +; RV32I-NEXT: .LBB19_74: +; RV32I-NEXT: or a5, s1, a5 ; RV32I-NEXT: .LBB19_75: -; RV32I-NEXT: bltu s9, t5, .LBB19_77 +; RV32I-NEXT: li s0, 4 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_91 ; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t1, s9 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: bnez s9, .LBB19_78 -; RV32I-NEXT: j .LBB19_79 +; RV32I-NEXT: beq t1, s2, .LBB19_92 ; RV32I-NEXT: .LBB19_77: -; RV32I-NEXT: sll s3, t1, s2 -; RV32I-NEXT: neg a3, s9 -; RV32I-NEXT: srl a3, t1, a3 -; RV32I-NEXT: sll s4, a7, s2 -; RV32I-NEXT: or a3, a3, s4 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: beqz s9, .LBB19_79 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_93 ; RV32I-NEXT: .LBB19_78: -; RV32I-NEXT: mv s7, a3 +; RV32I-NEXT: beq t1, s3, .LBB19_94 ; RV32I-NEXT: .LBB19_79: -; RV32I-NEXT: bltu s2, t6, .LBB19_81 -; RV32I-NEXT: # %bb.80: -; RV32I-NEXT: li s11, 0 -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: j .LBB19_82 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_95 +; RV32I-NEXT: .LBB19_80: +; RV32I-NEXT: beq t1, s5, .LBB19_96 ; RV32I-NEXT: .LBB19_81: -; RV32I-NEXT: or s3, a6, s6 -; RV32I-NEXT: or s7, s1, s5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_97 ; RV32I-NEXT: .LBB19_82: -; RV32I-NEXT: addi ra, a5, -128 -; RV32I-NEXT: mv s4, t2 -; RV32I-NEXT: mv s6, a4 -; RV32I-NEXT: beqz s2, .LBB19_84 -; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: mv s4, s3 -; RV32I-NEXT: mv s6, s7 +; RV32I-NEXT: beq t1, s0, .LBB19_98 +; RV32I-NEXT: .LBB19_83: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_99 ; RV32I-NEXT: .LBB19_84: -; RV32I-NEXT: neg s9, ra -; RV32I-NEXT: sll s3, a4, s9 -; RV32I-NEXT: bltu ra, t5, .LBB19_86 -; RV32I-NEXT: # %bb.85: -; RV32I-NEXT: sra a3, a4, ra -; RV32I-NEXT: mv s1, t2 -; RV32I-NEXT: bnez ra, .LBB19_87 -; RV32I-NEXT: j .LBB19_88 +; RV32I-NEXT: beq t1, s10, .LBB19_100 +; RV32I-NEXT: .LBB19_85: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB19_101 ; RV32I-NEXT: .LBB19_86: -; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: mv s1, t2 -; RV32I-NEXT: beqz ra, .LBB19_88 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB19_102 ; RV32I-NEXT: .LBB19_87: -; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: bnez t0, .LBB19_103 ; RV32I-NEXT: .LBB19_88: -; RV32I-NEXT: bltu ra, t5, .LBB19_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: srai s2, a4, 31 -; RV32I-NEXT: srl a3, a7, ra -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: bnez ra, .LBB19_91 -; RV32I-NEXT: j .LBB19_92 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_104 +; RV32I-NEXT: .LBB19_89: +; RV32I-NEXT: beqz t1, .LBB19_105 ; RV32I-NEXT: .LBB19_90: -; RV32I-NEXT: sra s2, a4, a5 -; RV32I-NEXT: sll a3, a7, s9 -; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: beqz ra, .LBB19_92 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_106 +; RV32I-NEXT: j .LBB19_107 ; RV32I-NEXT: .LBB19_91: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne t1, s2, .LBB19_77 ; RV32I-NEXT: .LBB19_92: -; RV32I-NEXT: mv s5, t0 -; RV32I-NEXT: sub s10, t6, ra -; RV32I-NEXT: li t0, 64 -; RV32I-NEXT: bltu ra, t5, .LBB19_94 -; RV32I-NEXT: # %bb.93: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: j .LBB19_95 +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_78 +; RV32I-NEXT: .LBB19_93: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne t1, s3, .LBB19_79 ; RV32I-NEXT: .LBB19_94: -; RV32I-NEXT: srl s7, a7, a5 +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_80 ; RV32I-NEXT: .LBB19_95: -; RV32I-NEXT: mv t6, s8 -; RV32I-NEXT: mv s8, s11 -; RV32I-NEXT: bltu s10, t5, .LBB19_97 -; RV32I-NEXT: # %bb.96: -; RV32I-NEXT: li s9, 0 -; RV32I-NEXT: sll a3, t2, s10 -; RV32I-NEXT: j .LBB19_98 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s5, .LBB19_81 +; RV32I-NEXT: .LBB19_96: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_82 ; RV32I-NEXT: .LBB19_97: -; RV32I-NEXT: sll s9, t2, s9 -; RV32I-NEXT: neg a3, s10 -; RV32I-NEXT: srl a3, t2, a3 -; RV32I-NEXT: or a3, a3, s3 +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s0, .LBB19_83 ; RV32I-NEXT: .LBB19_98: -; RV32I-NEXT: addi s11, ra, -64 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: beqz s10, .LBB19_100 -; RV32I-NEXT: # %bb.99: -; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_84 +; RV32I-NEXT: .LBB19_99: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s10, .LBB19_85 ; RV32I-NEXT: .LBB19_100: -; RV32I-NEXT: bltu s11, t5, .LBB19_102 -; RV32I-NEXT: # %bb.101: -; RV32I-NEXT: sra a3, a4, s11 -; RV32I-NEXT: bnez s11, .LBB19_103 -; RV32I-NEXT: j .LBB19_104 +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB19_86 +; RV32I-NEXT: .LBB19_101: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB19_87 ; RV32I-NEXT: .LBB19_102: -; RV32I-NEXT: srl a3, t2, ra -; RV32I-NEXT: mv s10, s4 -; RV32I-NEXT: neg s4, s11 -; RV32I-NEXT: sll s4, a4, s4 -; RV32I-NEXT: or a3, a3, s4 -; RV32I-NEXT: mv s4, s10 -; RV32I-NEXT: beqz s11, .LBB19_104 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB19_88 ; RV32I-NEXT: .LBB19_103: -; RV32I-NEXT: mv t2, a3 +; RV32I-NEXT: mv a4, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_89 ; RV32I-NEXT: .LBB19_104: -; RV32I-NEXT: bltu s11, t5, .LBB19_106 -; RV32I-NEXT: # %bb.105: -; RV32I-NEXT: srai t5, a4, 31 -; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu ra, t0, .LBB19_107 -; RV32I-NEXT: j .LBB19_108 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bnez t1, .LBB19_90 +; RV32I-NEXT: .LBB19_105: +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_107 ; RV32I-NEXT: .LBB19_106: -; RV32I-NEXT: sra t5, a4, ra -; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu ra, t0, .LBB19_108 +; RV32I-NEXT: mv a3, s11 ; RV32I-NEXT: .LBB19_107: -; RV32I-NEXT: or t2, a6, s9 -; RV32I-NEXT: or t5, s7, s3 -; RV32I-NEXT: .LBB19_108: -; RV32I-NEXT: li a6, 128 -; RV32I-NEXT: bnez ra, .LBB19_117 -; RV32I-NEXT: # %bb.109: -; RV32I-NEXT: bgeu ra, t0, .LBB19_118 +; RV32I-NEXT: beq t1, s2, .LBB19_121 +; RV32I-NEXT: # %bb.108: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_122 +; RV32I-NEXT: .LBB19_109: +; RV32I-NEXT: beq t1, s3, .LBB19_123 ; RV32I-NEXT: .LBB19_110: -; RV32I-NEXT: bgeu a5, a6, .LBB19_112 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_124 ; RV32I-NEXT: .LBB19_111: -; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t1, a3, s8 -; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a7, a3, t6 -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s1, a3, s4 -; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s2, a3, s6 +; RV32I-NEXT: beq t1, s5, .LBB19_125 ; RV32I-NEXT: .LBB19_112: -; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv t0, s5 -; RV32I-NEXT: beqz a5, .LBB19_114 -; RV32I-NEXT: # %bb.113: -; RV32I-NEXT: mv s0, t1 -; RV32I-NEXT: mv a0, a7 -; RV32I-NEXT: mv t4, s1 -; RV32I-NEXT: mv t3, s2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_126 +; RV32I-NEXT: .LBB19_113: +; RV32I-NEXT: beq t1, s0, .LBB19_127 ; RV32I-NEXT: .LBB19_114: -; RV32I-NEXT: bltu a5, a6, .LBB19_116 -; RV32I-NEXT: # %bb.115: -; RV32I-NEXT: srai a1, a4, 31 -; RV32I-NEXT: mv t0, a1 -; RV32I-NEXT: mv s11, a1 -; RV32I-NEXT: mv ra, a1 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s10, .LBB19_128 +; RV32I-NEXT: .LBB19_115: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s1, .LBB19_129 ; RV32I-NEXT: .LBB19_116: -; RV32I-NEXT: srli a4, s0, 16 -; RV32I-NEXT: lui t1, 16 -; RV32I-NEXT: srli a7, s0, 24 -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: srli a6, t4, 16 -; RV32I-NEXT: srli s2, t4, 24 -; RV32I-NEXT: srli t2, t3, 16 -; RV32I-NEXT: srli s3, t3, 24 -; RV32I-NEXT: srli s1, a1, 16 -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: srli t6, t0, 16 -; RV32I-NEXT: srli s6, t0, 24 -; RV32I-NEXT: srli s5, s11, 16 -; RV32I-NEXT: srli s4, s11, 24 -; RV32I-NEXT: srli s7, ra, 16 -; RV32I-NEXT: srli s8, ra, 24 -; RV32I-NEXT: addi t1, t1, -1 -; RV32I-NEXT: and s9, s0, t1 -; RV32I-NEXT: and s10, a0, t1 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s8, .LBB19_130 +; RV32I-NEXT: .LBB19_117: +; RV32I-NEXT: bnez t0, .LBB19_131 +; RV32I-NEXT: .LBB19_118: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_132 +; RV32I-NEXT: .LBB19_119: +; RV32I-NEXT: beqz t1, .LBB19_133 +; RV32I-NEXT: .LBB19_120: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB19_134 +; RV32I-NEXT: .LBB19_121: +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_109 +; RV32I-NEXT: .LBB19_122: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s3, .LBB19_110 +; RV32I-NEXT: .LBB19_123: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_111 +; RV32I-NEXT: .LBB19_124: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s5, .LBB19_112 +; RV32I-NEXT: .LBB19_125: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_113 +; RV32I-NEXT: .LBB19_126: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s0, .LBB19_114 +; RV32I-NEXT: .LBB19_127: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s10, .LBB19_115 +; RV32I-NEXT: .LBB19_128: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s1, .LBB19_116 +; RV32I-NEXT: .LBB19_129: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s8, .LBB19_117 +; RV32I-NEXT: .LBB19_130: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB19_118 +; RV32I-NEXT: .LBB19_131: +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_119 +; RV32I-NEXT: .LBB19_132: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bnez t1, .LBB19_120 +; RV32I-NEXT: .LBB19_133: +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: .LBB19_134: +; RV32I-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_148 +; RV32I-NEXT: # %bb.135: +; RV32I-NEXT: beq t1, s2, .LBB19_149 +; RV32I-NEXT: .LBB19_136: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_150 +; RV32I-NEXT: .LBB19_137: +; RV32I-NEXT: beq t1, s3, .LBB19_151 +; RV32I-NEXT: .LBB19_138: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_152 +; RV32I-NEXT: .LBB19_139: +; RV32I-NEXT: beq t1, s5, .LBB19_153 +; RV32I-NEXT: .LBB19_140: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB19_154 +; RV32I-NEXT: .LBB19_141: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB19_155 +; RV32I-NEXT: .LBB19_142: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB19_156 +; RV32I-NEXT: .LBB19_143: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB19_157 +; RV32I-NEXT: .LBB19_144: +; RV32I-NEXT: bnez t0, .LBB19_158 +; RV32I-NEXT: .LBB19_145: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_159 +; RV32I-NEXT: .LBB19_146: +; RV32I-NEXT: beqz t1, .LBB19_160 +; RV32I-NEXT: .LBB19_147: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_161 +; RV32I-NEXT: j .LBB19_162 +; RV32I-NEXT: .LBB19_148: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s2, .LBB19_136 +; RV32I-NEXT: .LBB19_149: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_137 +; RV32I-NEXT: .LBB19_150: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s3, .LBB19_138 +; RV32I-NEXT: .LBB19_151: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_139 +; RV32I-NEXT: .LBB19_152: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s5, .LBB19_140 +; RV32I-NEXT: .LBB19_153: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB19_141 +; RV32I-NEXT: .LBB19_154: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB19_142 +; RV32I-NEXT: .LBB19_155: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB19_143 +; RV32I-NEXT: .LBB19_156: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB19_144 +; RV32I-NEXT: .LBB19_157: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB19_145 +; RV32I-NEXT: .LBB19_158: +; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_146 +; RV32I-NEXT: .LBB19_159: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t1, .LBB19_147 +; RV32I-NEXT: .LBB19_160: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_162 +; RV32I-NEXT: .LBB19_161: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: .LBB19_162: +; RV32I-NEXT: beq t1, s2, .LBB19_174 +; RV32I-NEXT: # %bb.163: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_175 +; RV32I-NEXT: .LBB19_164: +; RV32I-NEXT: beq t1, s3, .LBB19_176 +; RV32I-NEXT: .LBB19_165: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s5, .LBB19_177 +; RV32I-NEXT: .LBB19_166: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s0, .LBB19_178 +; RV32I-NEXT: .LBB19_167: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s10, .LBB19_179 +; RV32I-NEXT: .LBB19_168: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s1, .LBB19_180 +; RV32I-NEXT: .LBB19_169: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s8, .LBB19_181 +; RV32I-NEXT: .LBB19_170: +; RV32I-NEXT: bnez t0, .LBB19_182 +; RV32I-NEXT: .LBB19_171: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_183 +; RV32I-NEXT: .LBB19_172: +; RV32I-NEXT: beqz t1, .LBB19_184 +; RV32I-NEXT: .LBB19_173: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_185 +; RV32I-NEXT: j .LBB19_186 +; RV32I-NEXT: .LBB19_174: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_164 +; RV32I-NEXT: .LBB19_175: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s3, .LBB19_165 +; RV32I-NEXT: .LBB19_176: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s5, .LBB19_166 +; RV32I-NEXT: .LBB19_177: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s0, .LBB19_167 +; RV32I-NEXT: .LBB19_178: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s10, .LBB19_168 +; RV32I-NEXT: .LBB19_179: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s1, .LBB19_169 +; RV32I-NEXT: .LBB19_180: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s8, .LBB19_170 +; RV32I-NEXT: .LBB19_181: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB19_171 +; RV32I-NEXT: .LBB19_182: +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_172 +; RV32I-NEXT: .LBB19_183: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bnez t1, .LBB19_173 +; RV32I-NEXT: .LBB19_184: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_186 +; RV32I-NEXT: .LBB19_185: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: .LBB19_186: +; RV32I-NEXT: beq t1, s2, .LBB19_197 +; RV32I-NEXT: # %bb.187: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s3, .LBB19_198 +; RV32I-NEXT: .LBB19_188: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s5, .LBB19_199 +; RV32I-NEXT: .LBB19_189: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB19_200 +; RV32I-NEXT: .LBB19_190: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB19_201 +; RV32I-NEXT: .LBB19_191: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB19_202 +; RV32I-NEXT: .LBB19_192: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB19_203 +; RV32I-NEXT: .LBB19_193: +; RV32I-NEXT: bnez t0, .LBB19_204 +; RV32I-NEXT: .LBB19_194: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB19_205 +; RV32I-NEXT: .LBB19_195: +; RV32I-NEXT: beqz t1, .LBB19_206 +; RV32I-NEXT: .LBB19_196: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s2, .LBB19_207 +; RV32I-NEXT: j .LBB19_208 +; RV32I-NEXT: .LBB19_197: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s3, .LBB19_188 +; RV32I-NEXT: .LBB19_198: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s5, .LBB19_189 +; RV32I-NEXT: .LBB19_199: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB19_190 +; RV32I-NEXT: .LBB19_200: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB19_191 +; RV32I-NEXT: .LBB19_201: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB19_192 +; RV32I-NEXT: .LBB19_202: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB19_193 +; RV32I-NEXT: .LBB19_203: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB19_194 +; RV32I-NEXT: .LBB19_204: +; RV32I-NEXT: mv t3, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB19_195 +; RV32I-NEXT: .LBB19_205: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bnez t1, .LBB19_196 +; RV32I-NEXT: .LBB19_206: +; RV32I-NEXT: or a3, s6, a3 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s2, .LBB19_208 +; RV32I-NEXT: .LBB19_207: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: .LBB19_208: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s3, .LBB19_217 +; RV32I-NEXT: # %bb.209: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s5, .LBB19_218 +; RV32I-NEXT: .LBB19_210: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB19_219 +; RV32I-NEXT: .LBB19_211: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB19_220 +; RV32I-NEXT: .LBB19_212: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB19_221 +; RV32I-NEXT: .LBB19_213: +; RV32I-NEXT: bne t1, s8, .LBB19_222 +; RV32I-NEXT: .LBB19_214: +; RV32I-NEXT: beqz t0, .LBB19_216 +; RV32I-NEXT: .LBB19_215: +; RV32I-NEXT: mv a1, t2 +; RV32I-NEXT: .LBB19_216: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: srli a5, ra, 16 +; RV32I-NEXT: lui t4, 16 +; RV32I-NEXT: srli t2, ra, 24 +; RV32I-NEXT: srli t0, s11, 16 +; RV32I-NEXT: srli t6, s11, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli s2, a4, 24 +; RV32I-NEXT: srli t5, a6, 16 +; RV32I-NEXT: srli s3, a6, 24 +; RV32I-NEXT: srli s1, a7, 16 +; RV32I-NEXT: srli a3, a7, 24 +; RV32I-NEXT: srli s0, a0, 16 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli s4, t3, 16 +; RV32I-NEXT: srli s6, t3, 24 +; RV32I-NEXT: srli s7, a1, 16 +; RV32I-NEXT: srli s8, a1, 24 +; RV32I-NEXT: addi t4, t4, -1 +; RV32I-NEXT: and s9, ra, t4 +; RV32I-NEXT: and s10, s11, t4 ; RV32I-NEXT: srli s9, s9, 8 -; RV32I-NEXT: sb s0, 0(a2) +; RV32I-NEXT: sb ra, 0(a2) ; RV32I-NEXT: sb s9, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: and a4, t4, t1 -; RV32I-NEXT: srli a7, s10, 8 -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: sb a7, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t5, 7(a2) -; RV32I-NEXT: and a0, t3, t1 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t4, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb a6, 10(a2) +; RV32I-NEXT: sb a5, 2(a2) +; RV32I-NEXT: sb t2, 3(a2) +; RV32I-NEXT: and a5, a4, t4 +; RV32I-NEXT: srli t2, s10, 8 +; RV32I-NEXT: sb s11, 4(a2) +; RV32I-NEXT: sb t2, 5(a2) +; RV32I-NEXT: sb t0, 6(a2) +; RV32I-NEXT: sb t6, 7(a2) +; RV32I-NEXT: and t0, a6, t4 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb t1, 10(a2) ; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, a1, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb t3, 12(a2) -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t2, 14(a2) +; RV32I-NEXT: and a4, a7, t4 +; RV32I-NEXT: srli a5, t0, 8 +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a5, 13(a2) +; RV32I-NEXT: sb t5, 14(a2) ; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a0, t0, t1 +; RV32I-NEXT: and a5, a0, t4 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a1, 16(a2) +; RV32I-NEXT: sb a7, 16(a2) ; RV32I-NEXT: sb a4, 17(a2) ; RV32I-NEXT: sb s1, 18(a2) ; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a1, s11, t1 -; RV32I-NEXT: and a3, ra, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: and a3, t3, t4 +; RV32I-NEXT: and a4, a1, t4 +; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a0, 21(a2) -; RV32I-NEXT: sb t6, 22(a2) -; RV32I-NEXT: sb s6, 23(a2) -; RV32I-NEXT: sb s11, 24(a2) -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s5, 26(a2) -; RV32I-NEXT: sb s4, 27(a2) -; RV32I-NEXT: sb ra, 28(a2) -; RV32I-NEXT: sb a3, 29(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a0, 20(a2) +; RV32I-NEXT: sb a5, 21(a2) +; RV32I-NEXT: sb s0, 22(a2) +; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb t3, 24(a2) +; RV32I-NEXT: sb a3, 25(a2) +; RV32I-NEXT: sb s4, 26(a2) +; RV32I-NEXT: sb s6, 27(a2) +; RV32I-NEXT: sb a1, 28(a2) +; RV32I-NEXT: sb a4, 29(a2) ; RV32I-NEXT: sb s7, 30(a2) ; RV32I-NEXT: sb s8, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB19_117: -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: bltu ra, t0, .LBB19_110 -; RV32I-NEXT: .LBB19_118: -; RV32I-NEXT: srai s1, a4, 31 -; RV32I-NEXT: mv s2, s1 -; RV32I-NEXT: bltu a5, a6, .LBB19_111 -; RV32I-NEXT: j .LBB19_112 +; RV32I-NEXT: .LBB19_217: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s5, .LBB19_210 +; RV32I-NEXT: .LBB19_218: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB19_211 +; RV32I-NEXT: .LBB19_219: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB19_212 +; RV32I-NEXT: .LBB19_220: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB19_213 +; RV32I-NEXT: .LBB19_221: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beq t1, s8, .LBB19_214 +; RV32I-NEXT: .LBB19_222: +; RV32I-NEXT: mv t2, a3 +; RV32I-NEXT: bnez t0, .LBB19_215 +; RV32I-NEXT: j .LBB19_216 %src = load i256, ptr %src.ptr, align 1 %wordOff = load i256, ptr %wordOff.ptr, align 1 %bitOff = shl i256 %wordOff, 5 @@ -10362,617 +12214,893 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; ; RV32I-LABEL: ashr_32bytes_dwordOff: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -96 -; RV32I-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu a3, 16(a0) -; RV32I-NEXT: lbu a4, 17(a0) -; RV32I-NEXT: lbu a5, 18(a0) -; RV32I-NEXT: lbu a6, 19(a0) -; RV32I-NEXT: lbu a7, 20(a0) -; RV32I-NEXT: lbu t0, 21(a0) -; RV32I-NEXT: lbu t1, 22(a0) -; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: addi sp, sp, -80 +; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 68(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 64(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 60(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 56(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 52(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 48(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu t0, 2(a0) +; RV32I-NEXT: lbu t1, 3(a0) +; RV32I-NEXT: lbu t2, 4(a0) +; RV32I-NEXT: lbu t3, 5(a0) +; RV32I-NEXT: lbu t4, 6(a0) +; RV32I-NEXT: lbu t5, 7(a0) +; RV32I-NEXT: lbu t6, 8(a0) +; RV32I-NEXT: lbu s0, 9(a0) +; RV32I-NEXT: lbu s1, 10(a0) +; RV32I-NEXT: lbu s2, 11(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t3, t4, t3 -; RV32I-NEXT: or t4, t6, t5 -; RV32I-NEXT: or t0, t0, a7 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: or a4, a4, a3 +; RV32I-NEXT: lbu a7, 13(a0) +; RV32I-NEXT: lbu a6, 14(a0) +; RV32I-NEXT: lbu a3, 15(a0) +; RV32I-NEXT: lbu s3, 28(a0) +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: lbu t2, 29(a0) +; RV32I-NEXT: lbu t3, 30(a0) +; RV32I-NEXT: lbu t5, 31(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: slli s4, t2, 8 +; RV32I-NEXT: or t2, s0, t6 +; RV32I-NEXT: or s0, s2, s1 +; RV32I-NEXT: or s1, s4, s3 +; RV32I-NEXT: lbu t6, 0(a1) +; RV32I-NEXT: lbu s2, 1(a1) +; RV32I-NEXT: lbu s3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s0, t5, a7 +; RV32I-NEXT: or s4, t5, t3 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s2, s2, t6 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 -; RV32I-NEXT: li t5, 32 -; RV32I-NEXT: slli a7, a4, 16 -; RV32I-NEXT: slli a1, a5, 16 +; RV32I-NEXT: or a1, a1, s3 +; RV32I-NEXT: slli t3, a3, 8 +; RV32I-NEXT: slli t6, t0, 16 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t1, t1, 16 -; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or a4, t1, t0 -; RV32I-NEXT: or a5, a5, s0 -; RV32I-NEXT: slli a5, a5, 6 -; RV32I-NEXT: srl s0, t2, a5 -; RV32I-NEXT: neg s6, a5 -; RV32I-NEXT: sll s1, a4, s6 -; RV32I-NEXT: bltu a5, t5, .LBB20_2 +; RV32I-NEXT: slli t5, s0, 16 +; RV32I-NEXT: slli s4, s4, 16 +; RV32I-NEXT: slli a3, a1, 16 +; RV32I-NEXT: or s5, t4, t1 +; RV32I-NEXT: or a1, s4, s1 +; RV32I-NEXT: or t0, a3, s2 +; RV32I-NEXT: slli t0, t0, 6 +; RV32I-NEXT: srli t1, t0, 5 +; RV32I-NEXT: andi t4, t0, 31 +; RV32I-NEXT: neg a3, t4 +; RV32I-NEXT: beqz t4, .LBB20_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra t0, a4, a5 -; RV32I-NEXT: j .LBB20_3 +; RV32I-NEXT: sll a5, s5, a3 ; RV32I-NEXT: .LBB20_2: -; RV32I-NEXT: or t0, s0, s1 -; RV32I-NEXT: .LBB20_3: -; RV32I-NEXT: or t1, a7, a3 -; RV32I-NEXT: or a7, a1, a6 -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beqz a5, .LBB20_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv t3, t0 +; RV32I-NEXT: or s10, t6, a4 +; RV32I-NEXT: lbu t6, 12(a0) +; RV32I-NEXT: lbu s0, 19(a0) +; RV32I-NEXT: slli s1, a7, 8 +; RV32I-NEXT: or a6, t3, a6 +; RV32I-NEXT: or a4, t5, t2 +; RV32I-NEXT: srai t2, a1, 31 +; RV32I-NEXT: beqz t1, .LBB20_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB20_5 +; RV32I-NEXT: .LBB20_4: +; RV32I-NEXT: srl a7, s10, t0 +; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: .LBB20_5: -; RV32I-NEXT: srl a3, t1, a5 -; RV32I-NEXT: sll a1, a7, s6 -; RV32I-NEXT: sw a1, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu a5, t5, .LBB20_7 +; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: lbu s3, 17(a0) +; RV32I-NEXT: lbu t3, 18(a0) +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: or s4, s1, t6 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: li s6, 1 +; RV32I-NEXT: sll s2, a4, a3 +; RV32I-NEXT: beqz t4, .LBB20_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: srai t4, a4, 31 -; RV32I-NEXT: srl a1, a7, a5 -; RV32I-NEXT: j .LBB20_8 +; RV32I-NEXT: mv a7, s2 ; RV32I-NEXT: .LBB20_7: -; RV32I-NEXT: sra t4, a4, a5 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: .LBB20_8: -; RV32I-NEXT: li t6, 64 -; RV32I-NEXT: mv t0, t1 -; RV32I-NEXT: beqz a5, .LBB20_10 -; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t0, a1 -; RV32I-NEXT: .LBB20_10: -; RV32I-NEXT: sub s7, t6, a5 -; RV32I-NEXT: bltu a5, t5, .LBB20_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: j .LBB20_13 -; RV32I-NEXT: .LBB20_12: -; RV32I-NEXT: srl a1, a7, a5 +; RV32I-NEXT: lbu t5, 16(a0) +; RV32I-NEXT: lbu t6, 23(a0) +; RV32I-NEXT: slli s1, s3, 8 +; RV32I-NEXT: or s0, s0, t3 +; RV32I-NEXT: srl s3, s5, t0 +; RV32I-NEXT: or a6, a6, s4 +; RV32I-NEXT: bne t1, s6, .LBB20_9 +; RV32I-NEXT: # %bb.8: +; RV32I-NEXT: or a5, s3, a7 +; RV32I-NEXT: .LBB20_9: +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s6, 21(a0) +; RV32I-NEXT: lbu a7, 22(a0) +; RV32I-NEXT: slli s4, t6, 8 +; RV32I-NEXT: or s7, s1, t5 +; RV32I-NEXT: slli s8, s0, 16 +; RV32I-NEXT: li s9, 2 +; RV32I-NEXT: sll s0, a6, a3 +; RV32I-NEXT: beqz t4, .LBB20_11 +; RV32I-NEXT: # %bb.10: +; RV32I-NEXT: mv t3, s0 +; RV32I-NEXT: .LBB20_11: +; RV32I-NEXT: lbu t5, 20(a0) +; RV32I-NEXT: lbu t6, 27(a0) +; RV32I-NEXT: slli s6, s6, 8 +; RV32I-NEXT: or s4, s4, a7 +; RV32I-NEXT: srl s1, a4, t0 +; RV32I-NEXT: or a7, s8, s7 +; RV32I-NEXT: bne t1, s9, .LBB20_13 +; RV32I-NEXT: # %bb.12: +; RV32I-NEXT: or a5, s1, t3 ; RV32I-NEXT: .LBB20_13: -; RV32I-NEXT: sw a3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: neg s10, s7 -; RV32I-NEXT: bltu s7, t5, .LBB20_15 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: lbu s8, 25(a0) +; RV32I-NEXT: lbu s7, 26(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: or s6, s6, t5 +; RV32I-NEXT: slli s9, s4, 16 +; RV32I-NEXT: li s11, 3 +; RV32I-NEXT: sll t5, a7, a3 +; RV32I-NEXT: beqz t4, .LBB20_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: li a6, 0 -; RV32I-NEXT: sll a3, t2, s7 -; RV32I-NEXT: j .LBB20_16 +; RV32I-NEXT: mv t3, t5 ; RV32I-NEXT: .LBB20_15: -; RV32I-NEXT: sll a6, t2, s6 -; RV32I-NEXT: srl a3, t2, s10 -; RV32I-NEXT: or a3, a3, s1 -; RV32I-NEXT: .LBB20_16: -; RV32I-NEXT: sw t3, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi s9, a5, -64 -; RV32I-NEXT: mv t3, a4 -; RV32I-NEXT: beqz s7, .LBB20_18 -; RV32I-NEXT: # %bb.17: -; RV32I-NEXT: mv t3, a3 -; RV32I-NEXT: .LBB20_18: -; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s9, t5, .LBB20_20 -; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: sra s0, a4, s9 -; RV32I-NEXT: j .LBB20_21 -; RV32I-NEXT: .LBB20_20: -; RV32I-NEXT: sll a3, a4, s11 -; RV32I-NEXT: or s0, s0, a3 +; RV32I-NEXT: lbu s4, 24(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or s7, t6, s7 +; RV32I-NEXT: srl t6, a6, t0 +; RV32I-NEXT: or a0, s9, s6 +; RV32I-NEXT: sw s5, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s10, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: bne t1, s11, .LBB20_17 +; RV32I-NEXT: # %bb.16: +; RV32I-NEXT: or a5, t6, t3 +; RV32I-NEXT: .LBB20_17: +; RV32I-NEXT: li s6, 0 +; RV32I-NEXT: or t3, s8, s4 +; RV32I-NEXT: slli s7, s7, 16 +; RV32I-NEXT: li s10, 4 +; RV32I-NEXT: sll s11, a0, a3 +; RV32I-NEXT: beqz t4, .LBB20_19 +; RV32I-NEXT: # %bb.18: +; RV32I-NEXT: mv s6, s11 +; RV32I-NEXT: .LBB20_19: +; RV32I-NEXT: srl s4, a7, t0 +; RV32I-NEXT: or t3, s7, t3 +; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: bne t1, s10, .LBB20_21 +; RV32I-NEXT: # %bb.20: +; RV32I-NEXT: or a5, s4, s6 ; RV32I-NEXT: .LBB20_21: -; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) -; RV32I-NEXT: lbu a3, 15(a0) -; RV32I-NEXT: mv t4, t2 -; RV32I-NEXT: beqz s9, .LBB20_23 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s5, 5 +; RV32I-NEXT: sll s6, t3, a3 +; RV32I-NEXT: sw s6, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: beqz t4, .LBB20_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv t4, s0 +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB20_23: -; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) -; RV32I-NEXT: lbu s8, 13(a0) -; RV32I-NEXT: lbu ra, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: bltu s9, t5, .LBB20_25 +; RV32I-NEXT: srl s6, a0, t0 +; RV32I-NEXT: beq t1, s5, .LBB20_25 ; RV32I-NEXT: # %bb.24: -; RV32I-NEXT: srai s0, a4, 31 +; RV32I-NEXT: mv ra, s6 ; RV32I-NEXT: j .LBB20_26 ; RV32I-NEXT: .LBB20_25: -; RV32I-NEXT: sra s0, a4, a5 +; RV32I-NEXT: mv ra, s6 +; RV32I-NEXT: or a5, s6, s4 ; RV32I-NEXT: .LBB20_26: -; RV32I-NEXT: or s1, s3, s1 -; RV32I-NEXT: lbu s5, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, s8, 8 -; RV32I-NEXT: or s8, a3, ra -; RV32I-NEXT: bgeu a5, t6, .LBB20_28 +; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: li s8, 6 +; RV32I-NEXT: sll s7, a1, a3 +; RV32I-NEXT: beqz t4, .LBB20_28 ; RV32I-NEXT: # %bb.27: -; RV32I-NEXT: or t4, t0, a6 -; RV32I-NEXT: or s0, a1, t3 +; RV32I-NEXT: mv s4, s7 ; RV32I-NEXT: .LBB20_28: -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu t3, 7(a0) -; RV32I-NEXT: or a6, s2, s5 -; RV32I-NEXT: slli s2, s1, 16 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: slli s8, s8, 16 -; RV32I-NEXT: mv a1, t1 -; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: beqz a5, .LBB20_30 +; RV32I-NEXT: srl s5, t3, t0 +; RV32I-NEXT: beq t1, s8, .LBB20_30 ; RV32I-NEXT: # %bb.29: -; RV32I-NEXT: mv a1, t4 -; RV32I-NEXT: mv t0, s0 +; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: j .LBB20_31 ; RV32I-NEXT: .LBB20_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) -; RV32I-NEXT: lbu s3, 5(a0) -; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s4, t3, 8 -; RV32I-NEXT: or t4, s2, a6 -; RV32I-NEXT: or t3, s8, s1 -; RV32I-NEXT: bltu a5, t6, .LBB20_32 -; RV32I-NEXT: # %bb.31: -; RV32I-NEXT: srai a6, a4, 31 -; RV32I-NEXT: sw a6, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw a6, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: .LBB20_32: -; RV32I-NEXT: slli a6, ra, 8 -; RV32I-NEXT: or a3, s5, a3 -; RV32I-NEXT: lbu s1, 0(a0) -; RV32I-NEXT: lbu a0, 4(a0) -; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: or s0, s4, s0 -; RV32I-NEXT: srl s2, t4, a5 -; RV32I-NEXT: sll ra, t3, s6 -; RV32I-NEXT: bltu a5, t5, .LBB20_34 -; RV32I-NEXT: # %bb.33: -; RV32I-NEXT: srl s4, t3, a5 -; RV32I-NEXT: j .LBB20_35 -; RV32I-NEXT: .LBB20_34: -; RV32I-NEXT: or s4, s2, ra +; RV32I-NEXT: mv s9, s5 +; RV32I-NEXT: or a5, s5, s4 +; RV32I-NEXT: .LBB20_31: +; RV32I-NEXT: li s5, 0 +; RV32I-NEXT: li s6, 7 +; RV32I-NEXT: sll s4, t2, a3 +; RV32I-NEXT: beqz t4, .LBB20_33 +; RV32I-NEXT: # %bb.32: +; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: .LBB20_33: +; RV32I-NEXT: srl a3, a1, t0 +; RV32I-NEXT: bne t1, s6, .LBB20_35 +; RV32I-NEXT: # %bb.34: +; RV32I-NEXT: or a5, a3, s5 ; RV32I-NEXT: .LBB20_35: -; RV32I-NEXT: or a6, a6, s1 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: or a0, s3, a0 -; RV32I-NEXT: slli s1, s0, 16 -; RV32I-NEXT: mv s5, t4 -; RV32I-NEXT: beqz a5, .LBB20_37 +; RV32I-NEXT: li s5, 3 +; RV32I-NEXT: mv s6, a3 +; RV32I-NEXT: bnez t0, .LBB20_39 ; RV32I-NEXT: # %bb.36: -; RV32I-NEXT: mv s5, s4 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_40 ; RV32I-NEXT: .LBB20_37: -; RV32I-NEXT: or s0, a3, a6 -; RV32I-NEXT: or a0, s1, a0 -; RV32I-NEXT: bltu a5, t5, .LBB20_39 -; RV32I-NEXT: # %bb.38: -; RV32I-NEXT: li s4, 0 -; RV32I-NEXT: srl a3, a0, a5 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: bnez a5, .LBB20_40 -; RV32I-NEXT: j .LBB20_41 +; RV32I-NEXT: beqz t1, .LBB20_41 +; RV32I-NEXT: .LBB20_38: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB20_42 ; RV32I-NEXT: .LBB20_39: -; RV32I-NEXT: srl s4, t3, a5 -; RV32I-NEXT: srl a3, s0, a5 -; RV32I-NEXT: sll a6, a0, s6 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: beqz a5, .LBB20_41 +; RV32I-NEXT: sw a5, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_37 ; RV32I-NEXT: .LBB20_40: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: bnez t1, .LBB20_38 ; RV32I-NEXT: .LBB20_41: -; RV32I-NEXT: bltu a5, t5, .LBB20_44 -; RV32I-NEXT: # %bb.42: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: bgeu s7, t5, .LBB20_45 -; RV32I-NEXT: .LBB20_43: -; RV32I-NEXT: sll s3, t4, s6 -; RV32I-NEXT: srl a3, t4, s10 -; RV32I-NEXT: or a3, a3, ra -; RV32I-NEXT: mv s10, t3 -; RV32I-NEXT: bnez s7, .LBB20_46 -; RV32I-NEXT: j .LBB20_47 +; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: .LBB20_42: +; RV32I-NEXT: li s2, 1 +; RV32I-NEXT: li s3, 2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_61 +; RV32I-NEXT: # %bb.43: +; RV32I-NEXT: beq t1, s2, .LBB20_62 ; RV32I-NEXT: .LBB20_44: -; RV32I-NEXT: srl s1, a0, a5 -; RV32I-NEXT: bltu s7, t5, .LBB20_43 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_63 ; RV32I-NEXT: .LBB20_45: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t4, s7 -; RV32I-NEXT: mv s10, t3 -; RV32I-NEXT: beqz s7, .LBB20_47 +; RV32I-NEXT: beq t1, s3, .LBB20_64 ; RV32I-NEXT: .LBB20_46: -; RV32I-NEXT: mv s10, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_65 ; RV32I-NEXT: .LBB20_47: -; RV32I-NEXT: bltu s9, t5, .LBB20_49 -; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl a3, t3, s9 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: bnez s9, .LBB20_50 -; RV32I-NEXT: j .LBB20_51 +; RV32I-NEXT: beq t1, s5, .LBB20_66 +; RV32I-NEXT: .LBB20_48: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_67 ; RV32I-NEXT: .LBB20_49: -; RV32I-NEXT: sll a3, t3, s11 -; RV32I-NEXT: or a3, s2, a3 -; RV32I-NEXT: mv s2, t4 -; RV32I-NEXT: beqz s9, .LBB20_51 +; RV32I-NEXT: bne t1, s10, .LBB20_51 ; RV32I-NEXT: .LBB20_50: -; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: or a5, ra, a3 ; RV32I-NEXT: .LBB20_51: -; RV32I-NEXT: bltu s9, t5, .LBB20_53 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li s10, 5 +; RV32I-NEXT: bnez t4, .LBB20_68 ; RV32I-NEXT: # %bb.52: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: bltu a5, t6, .LBB20_54 -; RV32I-NEXT: j .LBB20_55 +; RV32I-NEXT: beq t1, s10, .LBB20_69 ; RV32I-NEXT: .LBB20_53: -; RV32I-NEXT: srl s7, t3, a5 -; RV32I-NEXT: bgeu a5, t6, .LBB20_55 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_70 ; RV32I-NEXT: .LBB20_54: -; RV32I-NEXT: or s2, a6, s3 -; RV32I-NEXT: or s7, s1, s10 +; RV32I-NEXT: bne t1, s8, .LBB20_56 ; RV32I-NEXT: .LBB20_55: -; RV32I-NEXT: li a3, 128 -; RV32I-NEXT: mv a6, s0 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: beqz a5, .LBB20_57 -; RV32I-NEXT: # %bb.56: -; RV32I-NEXT: mv a6, s2 -; RV32I-NEXT: mv s1, s7 -; RV32I-NEXT: .LBB20_57: -; RV32I-NEXT: sw a6, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sub s2, a3, a5 -; RV32I-NEXT: bltu a5, t6, .LBB20_59 -; RV32I-NEXT: # %bb.58: -; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: li s4, 0 +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: .LBB20_56: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: li s8, 7 +; RV32I-NEXT: bne t1, s8, .LBB20_71 +; RV32I-NEXT: # %bb.57: +; RV32I-NEXT: bnez t0, .LBB20_72 +; RV32I-NEXT: .LBB20_58: +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: bnez t4, .LBB20_73 ; RV32I-NEXT: .LBB20_59: -; RV32I-NEXT: neg s3, s2 -; RV32I-NEXT: srl a6, t1, s3 -; RV32I-NEXT: sw s4, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t5, .LBB20_61 -; RV32I-NEXT: # %bb.60: -; RV32I-NEXT: li s11, 0 -; RV32I-NEXT: sll a3, t1, s2 -; RV32I-NEXT: j .LBB20_62 +; RV32I-NEXT: beqz t1, .LBB20_74 +; RV32I-NEXT: .LBB20_60: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB20_75 ; RV32I-NEXT: .LBB20_61: -; RV32I-NEXT: sll s11, t1, s6 -; RV32I-NEXT: lw a3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: bne t1, s2, .LBB20_44 ; RV32I-NEXT: .LBB20_62: -; RV32I-NEXT: sub s1, t6, s2 -; RV32I-NEXT: mv s8, a7 -; RV32I-NEXT: beqz s2, .LBB20_64 -; RV32I-NEXT: # %bb.63: -; RV32I-NEXT: mv s8, a3 +; RV32I-NEXT: or a5, s1, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_45 +; RV32I-NEXT: .LBB20_63: +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne t1, s3, .LBB20_46 ; RV32I-NEXT: .LBB20_64: -; RV32I-NEXT: bltu s1, t5, .LBB20_66 -; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: srl a3, a7, s1 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: bnez s1, .LBB20_67 -; RV32I-NEXT: j .LBB20_68 +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_47 +; RV32I-NEXT: .LBB20_65: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne t1, s5, .LBB20_48 ; RV32I-NEXT: .LBB20_66: -; RV32I-NEXT: neg a3, s1 -; RV32I-NEXT: sll a3, a7, a3 -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: beqz s1, .LBB20_68 +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_49 ; RV32I-NEXT: .LBB20_67: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: beq t1, s10, .LBB20_50 +; RV32I-NEXT: j .LBB20_51 ; RV32I-NEXT: .LBB20_68: -; RV32I-NEXT: bltu s1, t5, .LBB20_71 -; RV32I-NEXT: # %bb.69: -; RV32I-NEXT: li s1, 0 -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s2, t5, .LBB20_72 +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s10, .LBB20_53 +; RV32I-NEXT: .LBB20_69: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_54 ; RV32I-NEXT: .LBB20_70: -; RV32I-NEXT: sll s6, t2, s6 -; RV32I-NEXT: srl a3, t2, s3 -; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: j .LBB20_73 +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: beq t1, s8, .LBB20_55 +; RV32I-NEXT: j .LBB20_56 ; RV32I-NEXT: .LBB20_71: -; RV32I-NEXT: srl s1, a7, s3 -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: bltu s2, t5, .LBB20_70 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB20_58 ; RV32I-NEXT: .LBB20_72: -; RV32I-NEXT: li s6, 0 -; RV32I-NEXT: sll a3, t2, s2 +; RV32I-NEXT: sw a3, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: beqz t4, .LBB20_59 ; RV32I-NEXT: .LBB20_73: -; RV32I-NEXT: addi s9, s2, -64 -; RV32I-NEXT: mv s5, a4 -; RV32I-NEXT: beqz s2, .LBB20_75 -; RV32I-NEXT: # %bb.74: -; RV32I-NEXT: mv s5, a3 +; RV32I-NEXT: mv a5, s0 +; RV32I-NEXT: bnez t1, .LBB20_60 +; RV32I-NEXT: .LBB20_74: +; RV32I-NEXT: or a5, s1, a5 ; RV32I-NEXT: .LBB20_75: -; RV32I-NEXT: bltu s9, t5, .LBB20_77 +; RV32I-NEXT: li s0, 4 +; RV32I-NEXT: li s1, 6 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_91 ; RV32I-NEXT: # %bb.76: -; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: sll a3, t1, s9 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: bnez s9, .LBB20_78 -; RV32I-NEXT: j .LBB20_79 +; RV32I-NEXT: beq t1, s2, .LBB20_92 ; RV32I-NEXT: .LBB20_77: -; RV32I-NEXT: sll s3, t1, s2 -; RV32I-NEXT: neg a3, s9 -; RV32I-NEXT: srl a3, t1, a3 -; RV32I-NEXT: sll s4, a7, s2 -; RV32I-NEXT: or a3, a3, s4 -; RV32I-NEXT: mv s7, a7 -; RV32I-NEXT: beqz s9, .LBB20_79 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_93 ; RV32I-NEXT: .LBB20_78: -; RV32I-NEXT: mv s7, a3 +; RV32I-NEXT: beq t1, s3, .LBB20_94 ; RV32I-NEXT: .LBB20_79: -; RV32I-NEXT: bltu s2, t6, .LBB20_81 -; RV32I-NEXT: # %bb.80: -; RV32I-NEXT: li s11, 0 -; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: j .LBB20_82 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_95 +; RV32I-NEXT: .LBB20_80: +; RV32I-NEXT: beq t1, s5, .LBB20_96 ; RV32I-NEXT: .LBB20_81: -; RV32I-NEXT: or s3, a6, s6 -; RV32I-NEXT: or s7, s1, s5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_97 ; RV32I-NEXT: .LBB20_82: -; RV32I-NEXT: addi ra, a5, -128 -; RV32I-NEXT: mv s4, t2 -; RV32I-NEXT: mv s6, a4 -; RV32I-NEXT: beqz s2, .LBB20_84 -; RV32I-NEXT: # %bb.83: -; RV32I-NEXT: mv s4, s3 -; RV32I-NEXT: mv s6, s7 +; RV32I-NEXT: beq t1, s0, .LBB20_98 +; RV32I-NEXT: .LBB20_83: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_99 ; RV32I-NEXT: .LBB20_84: -; RV32I-NEXT: neg s9, ra -; RV32I-NEXT: sll s3, a4, s9 -; RV32I-NEXT: bltu ra, t5, .LBB20_86 -; RV32I-NEXT: # %bb.85: -; RV32I-NEXT: sra a3, a4, ra -; RV32I-NEXT: mv s1, t2 -; RV32I-NEXT: bnez ra, .LBB20_87 -; RV32I-NEXT: j .LBB20_88 +; RV32I-NEXT: beq t1, s10, .LBB20_100 +; RV32I-NEXT: .LBB20_85: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB20_101 ; RV32I-NEXT: .LBB20_86: -; RV32I-NEXT: lw a3, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a3, s3 -; RV32I-NEXT: mv s1, t2 -; RV32I-NEXT: beqz ra, .LBB20_88 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB20_102 ; RV32I-NEXT: .LBB20_87: -; RV32I-NEXT: mv s1, a3 +; RV32I-NEXT: bnez t0, .LBB20_103 ; RV32I-NEXT: .LBB20_88: -; RV32I-NEXT: bltu ra, t5, .LBB20_90 -; RV32I-NEXT: # %bb.89: -; RV32I-NEXT: srai s2, a4, 31 -; RV32I-NEXT: srl a3, a7, ra -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: bnez ra, .LBB20_91 -; RV32I-NEXT: j .LBB20_92 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_104 +; RV32I-NEXT: .LBB20_89: +; RV32I-NEXT: beqz t1, .LBB20_105 ; RV32I-NEXT: .LBB20_90: -; RV32I-NEXT: sra s2, a4, a5 -; RV32I-NEXT: sll a3, a7, s9 -; RV32I-NEXT: lw a6, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a3, a6, a3 -; RV32I-NEXT: mv a6, t1 -; RV32I-NEXT: beqz ra, .LBB20_92 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_106 +; RV32I-NEXT: j .LBB20_107 ; RV32I-NEXT: .LBB20_91: -; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bne t1, s2, .LBB20_77 ; RV32I-NEXT: .LBB20_92: -; RV32I-NEXT: mv s5, t0 -; RV32I-NEXT: sub s10, t6, ra -; RV32I-NEXT: li t0, 64 -; RV32I-NEXT: bltu ra, t5, .LBB20_94 -; RV32I-NEXT: # %bb.93: -; RV32I-NEXT: li s7, 0 -; RV32I-NEXT: j .LBB20_95 +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_78 +; RV32I-NEXT: .LBB20_93: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bne t1, s3, .LBB20_79 ; RV32I-NEXT: .LBB20_94: -; RV32I-NEXT: srl s7, a7, a5 +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_80 ; RV32I-NEXT: .LBB20_95: -; RV32I-NEXT: mv t6, s8 -; RV32I-NEXT: mv s8, s11 -; RV32I-NEXT: bltu s10, t5, .LBB20_97 -; RV32I-NEXT: # %bb.96: -; RV32I-NEXT: li s9, 0 -; RV32I-NEXT: sll a3, t2, s10 -; RV32I-NEXT: j .LBB20_98 +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s5, .LBB20_81 +; RV32I-NEXT: .LBB20_96: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_82 ; RV32I-NEXT: .LBB20_97: -; RV32I-NEXT: sll s9, t2, s9 -; RV32I-NEXT: neg a3, s10 -; RV32I-NEXT: srl a3, t2, a3 -; RV32I-NEXT: or a3, a3, s3 +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s0, .LBB20_83 ; RV32I-NEXT: .LBB20_98: -; RV32I-NEXT: addi s11, ra, -64 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: beqz s10, .LBB20_100 -; RV32I-NEXT: # %bb.99: -; RV32I-NEXT: mv s3, a3 +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_84 +; RV32I-NEXT: .LBB20_99: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s10, .LBB20_85 ; RV32I-NEXT: .LBB20_100: -; RV32I-NEXT: bltu s11, t5, .LBB20_102 -; RV32I-NEXT: # %bb.101: -; RV32I-NEXT: sra a3, a4, s11 -; RV32I-NEXT: bnez s11, .LBB20_103 -; RV32I-NEXT: j .LBB20_104 +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB20_86 +; RV32I-NEXT: .LBB20_101: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB20_87 ; RV32I-NEXT: .LBB20_102: -; RV32I-NEXT: srl a3, t2, ra -; RV32I-NEXT: mv s10, s4 -; RV32I-NEXT: neg s4, s11 -; RV32I-NEXT: sll s4, a4, s4 -; RV32I-NEXT: or a3, a3, s4 -; RV32I-NEXT: mv s4, s10 -; RV32I-NEXT: beqz s11, .LBB20_104 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB20_88 ; RV32I-NEXT: .LBB20_103: -; RV32I-NEXT: mv t2, a3 +; RV32I-NEXT: mv a4, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_89 ; RV32I-NEXT: .LBB20_104: -; RV32I-NEXT: bltu s11, t5, .LBB20_106 -; RV32I-NEXT: # %bb.105: -; RV32I-NEXT: srai t5, a4, 31 -; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bltu ra, t0, .LBB20_107 -; RV32I-NEXT: j .LBB20_108 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bnez t1, .LBB20_90 +; RV32I-NEXT: .LBB20_105: +; RV32I-NEXT: or a5, t6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_107 ; RV32I-NEXT: .LBB20_106: -; RV32I-NEXT: sra t5, a4, ra -; RV32I-NEXT: lw s11, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: bgeu ra, t0, .LBB20_108 +; RV32I-NEXT: mv a3, s11 ; RV32I-NEXT: .LBB20_107: -; RV32I-NEXT: or t2, a6, s9 -; RV32I-NEXT: or t5, s7, s3 -; RV32I-NEXT: .LBB20_108: -; RV32I-NEXT: li a6, 128 -; RV32I-NEXT: bnez ra, .LBB20_117 -; RV32I-NEXT: # %bb.109: -; RV32I-NEXT: bgeu ra, t0, .LBB20_118 +; RV32I-NEXT: beq t1, s2, .LBB20_121 +; RV32I-NEXT: # %bb.108: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_122 +; RV32I-NEXT: .LBB20_109: +; RV32I-NEXT: beq t1, s3, .LBB20_123 ; RV32I-NEXT: .LBB20_110: -; RV32I-NEXT: bgeu a5, a6, .LBB20_112 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_124 ; RV32I-NEXT: .LBB20_111: -; RV32I-NEXT: lw a3, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: or t1, a3, s8 -; RV32I-NEXT: lw a3, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a7, a3, t6 -; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s1, a3, s4 -; RV32I-NEXT: lw a3, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s2, a3, s6 +; RV32I-NEXT: beq t1, s5, .LBB20_125 ; RV32I-NEXT: .LBB20_112: -; RV32I-NEXT: lw ra, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: mv t0, s5 -; RV32I-NEXT: beqz a5, .LBB20_114 -; RV32I-NEXT: # %bb.113: -; RV32I-NEXT: mv s0, t1 -; RV32I-NEXT: mv a0, a7 -; RV32I-NEXT: mv t4, s1 -; RV32I-NEXT: mv t3, s2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_126 +; RV32I-NEXT: .LBB20_113: +; RV32I-NEXT: beq t1, s0, .LBB20_127 ; RV32I-NEXT: .LBB20_114: -; RV32I-NEXT: bltu a5, a6, .LBB20_116 -; RV32I-NEXT: # %bb.115: -; RV32I-NEXT: srai a1, a4, 31 -; RV32I-NEXT: mv t0, a1 -; RV32I-NEXT: mv s11, a1 -; RV32I-NEXT: mv ra, a1 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s10, .LBB20_128 +; RV32I-NEXT: .LBB20_115: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s1, .LBB20_129 ; RV32I-NEXT: .LBB20_116: -; RV32I-NEXT: srli a4, s0, 16 -; RV32I-NEXT: lui t1, 16 -; RV32I-NEXT: srli a7, s0, 24 -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli t5, a0, 24 -; RV32I-NEXT: srli a6, t4, 16 -; RV32I-NEXT: srli s2, t4, 24 -; RV32I-NEXT: srli t2, t3, 16 -; RV32I-NEXT: srli s3, t3, 24 -; RV32I-NEXT: srli s1, a1, 16 -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: srli t6, t0, 16 -; RV32I-NEXT: srli s6, t0, 24 -; RV32I-NEXT: srli s5, s11, 16 -; RV32I-NEXT: srli s4, s11, 24 -; RV32I-NEXT: srli s7, ra, 16 -; RV32I-NEXT: srli s8, ra, 24 -; RV32I-NEXT: addi t1, t1, -1 -; RV32I-NEXT: and s9, s0, t1 -; RV32I-NEXT: and s10, a0, t1 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s8, .LBB20_130 +; RV32I-NEXT: .LBB20_117: +; RV32I-NEXT: bnez t0, .LBB20_131 +; RV32I-NEXT: .LBB20_118: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_132 +; RV32I-NEXT: .LBB20_119: +; RV32I-NEXT: beqz t1, .LBB20_133 +; RV32I-NEXT: .LBB20_120: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: j .LBB20_134 +; RV32I-NEXT: .LBB20_121: +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_109 +; RV32I-NEXT: .LBB20_122: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s3, .LBB20_110 +; RV32I-NEXT: .LBB20_123: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_111 +; RV32I-NEXT: .LBB20_124: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s5, .LBB20_112 +; RV32I-NEXT: .LBB20_125: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_113 +; RV32I-NEXT: .LBB20_126: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s0, .LBB20_114 +; RV32I-NEXT: .LBB20_127: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s10, .LBB20_115 +; RV32I-NEXT: .LBB20_128: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s1, .LBB20_116 +; RV32I-NEXT: .LBB20_129: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s8, .LBB20_117 +; RV32I-NEXT: .LBB20_130: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB20_118 +; RV32I-NEXT: .LBB20_131: +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_119 +; RV32I-NEXT: .LBB20_132: +; RV32I-NEXT: mv a3, s11 +; RV32I-NEXT: bnez t1, .LBB20_120 +; RV32I-NEXT: .LBB20_133: +; RV32I-NEXT: lw a5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: .LBB20_134: +; RV32I-NEXT: lw s11, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_148 +; RV32I-NEXT: # %bb.135: +; RV32I-NEXT: beq t1, s2, .LBB20_149 +; RV32I-NEXT: .LBB20_136: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_150 +; RV32I-NEXT: .LBB20_137: +; RV32I-NEXT: beq t1, s3, .LBB20_151 +; RV32I-NEXT: .LBB20_138: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_152 +; RV32I-NEXT: .LBB20_139: +; RV32I-NEXT: beq t1, s5, .LBB20_153 +; RV32I-NEXT: .LBB20_140: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB20_154 +; RV32I-NEXT: .LBB20_141: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB20_155 +; RV32I-NEXT: .LBB20_142: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB20_156 +; RV32I-NEXT: .LBB20_143: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB20_157 +; RV32I-NEXT: .LBB20_144: +; RV32I-NEXT: bnez t0, .LBB20_158 +; RV32I-NEXT: .LBB20_145: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_159 +; RV32I-NEXT: .LBB20_146: +; RV32I-NEXT: beqz t1, .LBB20_160 +; RV32I-NEXT: .LBB20_147: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_161 +; RV32I-NEXT: j .LBB20_162 +; RV32I-NEXT: .LBB20_148: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bne t1, s2, .LBB20_136 +; RV32I-NEXT: .LBB20_149: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_137 +; RV32I-NEXT: .LBB20_150: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bne t1, s3, .LBB20_138 +; RV32I-NEXT: .LBB20_151: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_139 +; RV32I-NEXT: .LBB20_152: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s5, .LBB20_140 +; RV32I-NEXT: .LBB20_153: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB20_141 +; RV32I-NEXT: .LBB20_154: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB20_142 +; RV32I-NEXT: .LBB20_155: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB20_143 +; RV32I-NEXT: .LBB20_156: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB20_144 +; RV32I-NEXT: .LBB20_157: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB20_145 +; RV32I-NEXT: .LBB20_158: +; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_146 +; RV32I-NEXT: .LBB20_159: +; RV32I-NEXT: lw a3, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: bnez t1, .LBB20_147 +; RV32I-NEXT: .LBB20_160: +; RV32I-NEXT: or a5, ra, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_162 +; RV32I-NEXT: .LBB20_161: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: .LBB20_162: +; RV32I-NEXT: beq t1, s2, .LBB20_174 +; RV32I-NEXT: # %bb.163: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_175 +; RV32I-NEXT: .LBB20_164: +; RV32I-NEXT: beq t1, s3, .LBB20_176 +; RV32I-NEXT: .LBB20_165: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s5, .LBB20_177 +; RV32I-NEXT: .LBB20_166: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s0, .LBB20_178 +; RV32I-NEXT: .LBB20_167: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s10, .LBB20_179 +; RV32I-NEXT: .LBB20_168: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s1, .LBB20_180 +; RV32I-NEXT: .LBB20_169: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s8, .LBB20_181 +; RV32I-NEXT: .LBB20_170: +; RV32I-NEXT: bnez t0, .LBB20_182 +; RV32I-NEXT: .LBB20_171: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_183 +; RV32I-NEXT: .LBB20_172: +; RV32I-NEXT: beqz t1, .LBB20_184 +; RV32I-NEXT: .LBB20_173: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_185 +; RV32I-NEXT: j .LBB20_186 +; RV32I-NEXT: .LBB20_174: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_164 +; RV32I-NEXT: .LBB20_175: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bne t1, s3, .LBB20_165 +; RV32I-NEXT: .LBB20_176: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s5, .LBB20_166 +; RV32I-NEXT: .LBB20_177: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s0, .LBB20_167 +; RV32I-NEXT: .LBB20_178: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s10, .LBB20_168 +; RV32I-NEXT: .LBB20_179: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s1, .LBB20_169 +; RV32I-NEXT: .LBB20_180: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s8, .LBB20_170 +; RV32I-NEXT: .LBB20_181: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beqz t0, .LBB20_171 +; RV32I-NEXT: .LBB20_182: +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_172 +; RV32I-NEXT: .LBB20_183: +; RV32I-NEXT: mv a3, s7 +; RV32I-NEXT: bnez t1, .LBB20_173 +; RV32I-NEXT: .LBB20_184: +; RV32I-NEXT: or a5, s9, a3 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_186 +; RV32I-NEXT: .LBB20_185: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: .LBB20_186: +; RV32I-NEXT: beq t1, s2, .LBB20_197 +; RV32I-NEXT: # %bb.187: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s3, .LBB20_198 +; RV32I-NEXT: .LBB20_188: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s5, .LBB20_199 +; RV32I-NEXT: .LBB20_189: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB20_200 +; RV32I-NEXT: .LBB20_190: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB20_201 +; RV32I-NEXT: .LBB20_191: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB20_202 +; RV32I-NEXT: .LBB20_192: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s8, .LBB20_203 +; RV32I-NEXT: .LBB20_193: +; RV32I-NEXT: bnez t0, .LBB20_204 +; RV32I-NEXT: .LBB20_194: +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: bnez t4, .LBB20_205 +; RV32I-NEXT: .LBB20_195: +; RV32I-NEXT: beqz t1, .LBB20_206 +; RV32I-NEXT: .LBB20_196: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s2, .LBB20_207 +; RV32I-NEXT: j .LBB20_208 +; RV32I-NEXT: .LBB20_197: +; RV32I-NEXT: or a5, s6, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s3, .LBB20_188 +; RV32I-NEXT: .LBB20_198: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s5, .LBB20_189 +; RV32I-NEXT: .LBB20_199: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB20_190 +; RV32I-NEXT: .LBB20_200: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB20_191 +; RV32I-NEXT: .LBB20_201: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB20_192 +; RV32I-NEXT: .LBB20_202: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s8, .LBB20_193 +; RV32I-NEXT: .LBB20_203: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: beqz t0, .LBB20_194 +; RV32I-NEXT: .LBB20_204: +; RV32I-NEXT: mv t3, a5 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: beqz t4, .LBB20_195 +; RV32I-NEXT: .LBB20_205: +; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: bnez t1, .LBB20_196 +; RV32I-NEXT: .LBB20_206: +; RV32I-NEXT: or a3, s6, a3 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s2, .LBB20_208 +; RV32I-NEXT: .LBB20_207: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: .LBB20_208: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s3, .LBB20_217 +; RV32I-NEXT: # %bb.209: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s5, .LBB20_218 +; RV32I-NEXT: .LBB20_210: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s0, .LBB20_219 +; RV32I-NEXT: .LBB20_211: +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: bne t1, s10, .LBB20_220 +; RV32I-NEXT: .LBB20_212: +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bne t1, s1, .LBB20_221 +; RV32I-NEXT: .LBB20_213: +; RV32I-NEXT: bne t1, s8, .LBB20_222 +; RV32I-NEXT: .LBB20_214: +; RV32I-NEXT: beqz t0, .LBB20_216 +; RV32I-NEXT: .LBB20_215: +; RV32I-NEXT: mv a1, t2 +; RV32I-NEXT: .LBB20_216: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: srli a5, ra, 16 +; RV32I-NEXT: lui t4, 16 +; RV32I-NEXT: srli t2, ra, 24 +; RV32I-NEXT: srli t0, s11, 16 +; RV32I-NEXT: srli t6, s11, 24 +; RV32I-NEXT: srli t1, a4, 16 +; RV32I-NEXT: srli s2, a4, 24 +; RV32I-NEXT: srli t5, a6, 16 +; RV32I-NEXT: srli s3, a6, 24 +; RV32I-NEXT: srli s1, a7, 16 +; RV32I-NEXT: srli a3, a7, 24 +; RV32I-NEXT: srli s0, a0, 16 +; RV32I-NEXT: srli s5, a0, 24 +; RV32I-NEXT: srli s4, t3, 16 +; RV32I-NEXT: srli s6, t3, 24 +; RV32I-NEXT: srli s7, a1, 16 +; RV32I-NEXT: srli s8, a1, 24 +; RV32I-NEXT: addi t4, t4, -1 +; RV32I-NEXT: and s9, ra, t4 +; RV32I-NEXT: and s10, s11, t4 ; RV32I-NEXT: srli s9, s9, 8 -; RV32I-NEXT: sb s0, 0(a2) +; RV32I-NEXT: sb ra, 0(a2) ; RV32I-NEXT: sb s9, 1(a2) -; RV32I-NEXT: sb a4, 2(a2) -; RV32I-NEXT: sb a7, 3(a2) -; RV32I-NEXT: and a4, t4, t1 -; RV32I-NEXT: srli a7, s10, 8 -; RV32I-NEXT: sb a0, 4(a2) -; RV32I-NEXT: sb a7, 5(a2) -; RV32I-NEXT: sb a5, 6(a2) -; RV32I-NEXT: sb t5, 7(a2) -; RV32I-NEXT: and a0, t3, t1 -; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb t4, 8(a2) -; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb a6, 10(a2) +; RV32I-NEXT: sb a5, 2(a2) +; RV32I-NEXT: sb t2, 3(a2) +; RV32I-NEXT: and a5, a4, t4 +; RV32I-NEXT: srli t2, s10, 8 +; RV32I-NEXT: sb s11, 4(a2) +; RV32I-NEXT: sb t2, 5(a2) +; RV32I-NEXT: sb t0, 6(a2) +; RV32I-NEXT: sb t6, 7(a2) +; RV32I-NEXT: and t0, a6, t4 +; RV32I-NEXT: srli a5, a5, 8 +; RV32I-NEXT: sb a4, 8(a2) +; RV32I-NEXT: sb a5, 9(a2) +; RV32I-NEXT: sb t1, 10(a2) ; RV32I-NEXT: sb s2, 11(a2) -; RV32I-NEXT: and a4, a1, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb t3, 12(a2) -; RV32I-NEXT: sb a0, 13(a2) -; RV32I-NEXT: sb t2, 14(a2) +; RV32I-NEXT: and a4, a7, t4 +; RV32I-NEXT: srli a5, t0, 8 +; RV32I-NEXT: sb a6, 12(a2) +; RV32I-NEXT: sb a5, 13(a2) +; RV32I-NEXT: sb t5, 14(a2) ; RV32I-NEXT: sb s3, 15(a2) -; RV32I-NEXT: and a0, t0, t1 +; RV32I-NEXT: and a5, a0, t4 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a1, 16(a2) +; RV32I-NEXT: sb a7, 16(a2) ; RV32I-NEXT: sb a4, 17(a2) ; RV32I-NEXT: sb s1, 18(a2) ; RV32I-NEXT: sb a3, 19(a2) -; RV32I-NEXT: and a1, s11, t1 -; RV32I-NEXT: and a3, ra, t1 -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: srli a1, a1, 8 +; RV32I-NEXT: and a3, t3, t4 +; RV32I-NEXT: and a4, a1, t4 +; RV32I-NEXT: srli a5, a5, 8 ; RV32I-NEXT: srli a3, a3, 8 -; RV32I-NEXT: sb t0, 20(a2) -; RV32I-NEXT: sb a0, 21(a2) -; RV32I-NEXT: sb t6, 22(a2) -; RV32I-NEXT: sb s6, 23(a2) -; RV32I-NEXT: sb s11, 24(a2) -; RV32I-NEXT: sb a1, 25(a2) -; RV32I-NEXT: sb s5, 26(a2) -; RV32I-NEXT: sb s4, 27(a2) -; RV32I-NEXT: sb ra, 28(a2) -; RV32I-NEXT: sb a3, 29(a2) +; RV32I-NEXT: srli a4, a4, 8 +; RV32I-NEXT: sb a0, 20(a2) +; RV32I-NEXT: sb a5, 21(a2) +; RV32I-NEXT: sb s0, 22(a2) +; RV32I-NEXT: sb s5, 23(a2) +; RV32I-NEXT: sb t3, 24(a2) +; RV32I-NEXT: sb a3, 25(a2) +; RV32I-NEXT: sb s4, 26(a2) +; RV32I-NEXT: sb s6, 27(a2) +; RV32I-NEXT: sb a1, 28(a2) +; RV32I-NEXT: sb a4, 29(a2) ; RV32I-NEXT: sb s7, 30(a2) ; RV32I-NEXT: sb s8, 31(a2) -; RV32I-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 96 +; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 64(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 60(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 56(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 48(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s9, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s10, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s11, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 80 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB20_117: -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: bltu ra, t0, .LBB20_110 -; RV32I-NEXT: .LBB20_118: -; RV32I-NEXT: srai s1, a4, 31 -; RV32I-NEXT: mv s2, s1 -; RV32I-NEXT: bltu a5, a6, .LBB20_111 -; RV32I-NEXT: j .LBB20_112 +; RV32I-NEXT: .LBB20_217: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s5, .LBB20_210 +; RV32I-NEXT: .LBB20_218: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s0, .LBB20_211 +; RV32I-NEXT: .LBB20_219: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: beq t1, s10, .LBB20_212 +; RV32I-NEXT: .LBB20_220: +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: beq t1, s1, .LBB20_213 +; RV32I-NEXT: .LBB20_221: +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: beq t1, s8, .LBB20_214 +; RV32I-NEXT: .LBB20_222: +; RV32I-NEXT: mv t2, a3 +; RV32I-NEXT: bnez t0, .LBB20_215 +; RV32I-NEXT: j .LBB20_216 %src = load i256, ptr %src.ptr, align 1 %dwordOff = load i256, ptr %dwordOff.ptr, align 1 %bitOff = shl i256 %dwordOff, 6