diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 96ee69cf3f4ce..dae0e81ca4ccf 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -955,8 +955,23 @@ bool LowOverheadLoop::ValidateLiveOuts() { else if (!isPredicated && retainsOrReduces) { LLVM_DEBUG(dbgs() << " Unpredicated instruction that retainsOrReduces: " << MI); return false; - } else if (!isPredicated && MI.getOpcode() != ARM::MQPRCopy) + } else if (!isPredicated) FalseLanesUnknown.insert(&MI); + else if (int InactiveIdx = findVPTInactiveOperandIdx(MI); + InactiveIdx != -1) { + auto MO = MI.getOperand(InactiveIdx); + SmallPtrSet Defs; + RDI.getGlobalReachingDefs(&MI, MO.getReg(), Defs); + for (auto *Def : Defs) { + if (Def != &MI && FalseLanesUnknown.count(Def)) { + LLVM_DEBUG(dbgs() << " we think this is FalseLanesUnknown: " << MI); + LLVM_DEBUG(dbgs() + << " because it takes the false lanes of this: " << *Def); + FalseLanesUnknown.insert(&MI); + break; + } + } + } } LLVM_DEBUG({ @@ -1035,13 +1050,7 @@ bool LowOverheadLoop::ValidateLiveOuts() { SmallVector Worklist(LiveOutMIs.begin(), LiveOutMIs.end()); while (!Worklist.empty()) { MachineInstr *MI = Worklist.pop_back_val(); - if (MI->getOpcode() == ARM::MQPRCopy) { - VMOVCopies.insert(MI); - MachineInstr *CopySrc = - RDI.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg()); - if (CopySrc) - Worklist.push_back(CopySrc); - } else if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) { + if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) { LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI); VMOVCopies.clear(); return false; diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 431ce38ad6e99..cb7c6a466aad4 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -805,6 +805,19 @@ int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) { return -1; } +int llvm::findVPTInactiveOperandIdx(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) + if (MCID.operands()[i].OperandType == ARM::OPERAND_VPRED_R) { + assert(MCID.getOperandConstraint(i + 3, MCOI::TIED_TO) != -1 && + "Operand #3 of VPRED_R is the one tied to the output register"); + return i + 3; + } + + return -1; +} + ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg) { int PIdx = findFirstVPTPredOperandIdx(MI); diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 3ec3a6216b9f6..1b0bf2d499510 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -90,6 +90,9 @@ inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) { Register PredReg; return getVPTInstrPredicate(MI, PredReg); } +// Identify the input operand in an MVE predicated instruction which +// contributes the values of any inactive vector lanes. +int findVPTInactiveOperandIdx(const MachineInstr &MI); // Recomputes the Block Mask of Instr, a VPT or VPST instruction. // This rebuilds the block mask of the instruction depending on the predicates diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir index 0995521957b98..3c1fc40df09d0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir @@ -309,34 +309,42 @@ body: | ; CHECK-NEXT: liveins: $r0, $r1, $r2, $r12 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $r6, dead $cpsr = nsw tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $r5, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg ; CHECK-NEXT: renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg ; CHECK-NEXT: renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg ; CHECK-NEXT: renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0 + ; CHECK-NEXT: renamable $r3 = t2BICri killed renamable $r6, 7, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $r6 = nuw nsw t2ADDrs killed renamable $r5, killed renamable $r3, 27, 14 /* CC::al */, $noreg, $noreg ; CHECK-NEXT: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $d0, $d1, $r0, $r1, $r2, $r3, $r6, $r12 + ; CHECK-NEXT: liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $r4 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK-NEXT: $lr = MVE_DLSTP_16 renamable $r2 + ; CHECK-NEXT: $r5 = tMOVr $r2, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $lr = t2DLS renamable $r6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.3(0x7c000000), %bb.4(0x04000000) - ; CHECK-NEXT: liveins: $lr, $d0, $d1, $r0, $r1, $r2, $r3, $r4, $r6, $r12 + ; CHECK-NEXT: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r12 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 0, $noreg, renamable $lr - ; CHECK-NEXT: $d4 = VMOVD killed $d0, 14 /* CC::al */, $noreg - ; CHECK-NEXT: $d5 = VMOVD killed $d1, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $vpr = MVE_VCTP16 renamable $r5, 0, $noreg, $noreg + ; CHECK-NEXT: renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg + ; CHECK-NEXT: MVE_VPST 8, implicit $vpr + ; CHECK-NEXT: renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr + ; CHECK-NEXT: $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2 ; CHECK-NEXT: renamable $q1 = MVE_VAND killed renamable $q1, renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1 - ; CHECK-NEXT: $d0 = VMOVD killed $d4, 14 /* CC::al */, $noreg - ; CHECK-NEXT: $d1 = VMOVD killed $d5, 14 /* CC::al */, $noreg - ; CHECK-NEXT: renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 0, killed $noreg, renamable $lr - ; CHECK-NEXT: $lr = MVE_LETP killed renamable $lr, %bb.3 + ; CHECK-NEXT: $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, $noreg, undef $q0 + ; CHECK-NEXT: MVE_VPST 8, implicit $vpr + ; CHECK-NEXT: renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr + ; CHECK-NEXT: $lr = t2LEUpdate killed renamable $lr, %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x04000000), %bb.2(0x7c000000) - ; CHECK-NEXT: liveins: $d0, $d1, $r0, $r1, $r2, $r3, $r6, $r12 + ; CHECK-NEXT: liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $r3, dead $cpsr = nuw nsw tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg ; CHECK-NEXT: renamable $r0 = tADDhirr killed renamable $r0, renamable $r1, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination-across-blocks.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination-across-blocks.mir index fb714f80f76f0..6991006cede8e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination-across-blocks.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination-across-blocks.mir @@ -447,20 +447,28 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $r0, $r1 ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r3, dead $cpsr = nuw nsw tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg ; CHECK-NEXT: renamable $q0 = MVE_VDUP32 renamable $r1, 0, $noreg, $noreg, undef renamable $q0 - ; CHECK-NEXT: $lr = MVE_DLSTP_32 killed renamable $r1 + ; CHECK-NEXT: renamable $r3 = t2ANDri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: renamable $lr = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (align 4): ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK-NEXT: liveins: $lr, $q0, $r0 + ; CHECK-NEXT: liveins: $lr, $q0, $r0, $r1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, $noreg - ; CHECK-NEXT: renamable $q1 = MVE_VORR killed renamable $q1, renamable $q0, 0, $noreg, $noreg, killed renamable $q1 - ; CHECK-NEXT: renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 0, killed $noreg, $noreg + ; CHECK-NEXT: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg + ; CHECK-NEXT: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK-NEXT: MVE_VPST 8, implicit $vpr + ; CHECK-NEXT: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg + ; CHECK-NEXT: MVE_VPST 4, implicit $vpr + ; CHECK-NEXT: renamable $q1 = MVE_VORR killed renamable $q1, renamable $q0, 1, renamable $vpr, $noreg, killed renamable $q1 + ; CHECK-NEXT: renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 1, killed renamable $vpr, $noreg ; CHECK-NEXT: renamable $q0 = MVE_VORR renamable $q1, renamable $q1, 0, $noreg, $noreg, killed renamable $q0 ; CHECK-NEXT: MVE_VPST 8, implicit $vpr ; CHECK-NEXT: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q1, 1, killed renamable $vpr, $noreg, killed renamable $q0 - ; CHECK-NEXT: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK-NEXT: $lr = t2LEUpdate killed renamable $lr, %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir new file mode 100644 index 0000000000000..cf68d60602e72 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-vs-unpredicated-copy.mir @@ -0,0 +1,237 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s + +# Wrong output is +# $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2 +# renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, renamable $lr :: (load unknown-size from %ir.13, align 4) +# $q0 = MVE_VORR $q1, $q1, 0, $noreg, $noreg, undef $q0 +# renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 0, killed $noreg, renamable $lr, killed renamable $q0 +# $lr = MVE_LETP killed renamable $lr, %bb.1 + +# in which the problem is that the second MVE_VORR, copying q1 into +# q0, is required to be unpredicated so that the inactive lanes will +# be copied. + +--- | + ; ModuleID = '162644.c' + source_filename = "162644.c" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-unknown-none-eabihf" + + @inactive = dso_local local_unnamed_addr global <4 x float> zeroinitializer, align 16 + + ; Function Attrs: nofree noinline norecurse nosync nounwind memory(read, inaccessiblemem: none) + define dso_local <4 x float> @test_func(ptr noundef readonly captures(none) %0, i32 noundef %1) local_unnamed_addr #0 { + %3 = load <4 x float>, ptr @inactive, align 16, !tbaa !3 + %4 = add i32 %1, 3 + %5 = call i32 @llvm.smin.i32(i32 %1, i32 4) + %6 = sub i32 %4, %5 + %7 = lshr i32 %6, 2 + %8 = add nuw nsw i32 %7, 1 + %9 = call i32 @llvm.start.loop.iterations.i32(i32 %8) + br label %10 + + 10: ; preds = %10, %2 + %11 = phi <4 x float> [ splat (float 0x3FB99999A0000000), %2 ], [ %17, %10 ] + %12 = phi i32 [ %1, %2 ], [ %19, %10 ] + %13 = phi ptr [ %0, %2 ], [ %18, %10 ] + %14 = phi i32 [ %9, %2 ], [ %20, %10 ] + %15 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %12) + %16 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %13, i32 4, <4 x i1> %15, <4 x float> zeroinitializer) + %17 = tail call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %11, <4 x float> %16, <4 x i1> %15, <4 x float> %3) + %18 = getelementptr inbounds nuw i8, ptr %13, i32 16 + %19 = add i32 %12, -4 + %20 = call i32 @llvm.loop.decrement.reg.i32(i32 %14, i32 1) + %21 = icmp ne i32 %20, 0 + br i1 %21, label %10, label %22, !llvm.loop !6 + + 22: ; preds = %10 + ret <4 x float> %17 + } + + ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 + + ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: read) + declare <4 x float> @llvm.masked.load.v4f32.p0(ptr captures(none), i32 immarg, <4 x i1>, <4 x float>) #2 + + ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) + declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 + + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) + declare i32 @llvm.smin.i32(i32, i32) #3 + + ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn + declare i32 @llvm.start.loop.iterations.i32(i32) #4 + + ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4 + + attributes #0 = { nofree noinline norecurse nosync nounwind memory(read, inaccessiblemem: none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m52" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+pacbti,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" } + attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) } + attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: read) } + attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + attributes #4 = { nocallback noduplicate nofree nosync nounwind willreturn } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"min_enum_size", i32 4} + !2 = !{!"clang version 22.0.0git"} + !3 = !{!4, !4, i64 0} + !4 = !{!"omnipotent char", !5, i64 0} + !5 = !{!"Simple C/C++ TBAA"} + !6 = distinct !{!6, !7, !8} + !7 = !{!"llvm.loop.mustprogress"} + !8 = !{!"llvm.loop.unroll.disable"} +... +--- +name: test_func +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: true +isSSA: false +noVRegs: true +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHContTarget: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: true + localFrameSize: 0 +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + isLRSpilled: true +body: | + ; CHECK-LABEL: name: test_func + ; CHECK: bb.0 (%ir-block.2): + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $lr, $r0, $r1, $r7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK-NEXT: $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg + ; CHECK-NEXT: tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + ; CHECK-NEXT: $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive, !tbaa !3) + ; CHECK-NEXT: $r2 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK-NEXT: t2IT 10, 8, implicit-def $itstate + ; CHECK-NEXT: renamable $r2 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r2, implicit killed $itstate + ; CHECK-NEXT: renamable $r2, dead $cpsr = tSUBrr renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 3, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg + ; CHECK-NEXT: $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg + ; CHECK-NEXT: renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0 + ; CHECK-NEXT: $lr = t2DLS killed renamable $r2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (%ir-block.10, align 4): + ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK-NEXT: liveins: $lr, $q0, $q1, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg + ; CHECK-NEXT: $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, $noreg, undef $q2 + ; CHECK-NEXT: MVE_VPST 8, implicit $vpr + ; CHECK-NEXT: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, renamable $lr :: (load unknown-size from %ir.13, align 4) + ; CHECK-NEXT: $q0 = MVE_VORR $q1, $q1, 0, $noreg, $noreg, undef $q0 + ; CHECK-NEXT: MVE_VPST 8, implicit $vpr + ; CHECK-NEXT: renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 1, killed renamable $vpr, renamable $lr, killed renamable $q0 + ; CHECK-NEXT: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + ; CHECK-NEXT: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2 (%ir-block.22): + ; CHECK-NEXT: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0 + bb.0 (%ir-block.2): + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r7, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $r2 = t2MOVi16 target-flags(arm-lo16) @inactive, 14 /* CC::al */, $noreg + tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @inactive, 14 /* CC::al */, $noreg + renamable $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + renamable $q1 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg, $noreg :: (dereferenceable load (s128) from @inactive, !tbaa !3) + $r2 = tMOVr $r1, 14 /* CC::al */, $noreg + t2IT 10, 8, implicit-def $itstate + renamable $r2 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r2, implicit killed $itstate + renamable $r2, dead $cpsr = tSUBrr renamable $r1, killed renamable $r2, 14 /* CC::al */, $noreg + renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 3, 14 /* CC::al */, $noreg + renamable $r2 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg + $r3 = t2MOVi16 52429, 14 /* CC::al */, $noreg + $r3 = t2MOVTi16 killed $r3, 15820, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0 + renamable $lr = t2DoLoopStartTP killed renamable $r2, renamable $r1 + + bb.1 (%ir-block.10, align 4): + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $q1, $r0, $r1 + + renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg + $q2 = MQPRCopy killed $q0 + MVE_VPST 8, implicit $vpr + renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, renamable $lr :: (load unknown-size from %ir.13, align 4) + $q0 = MQPRCopy $q1 + MVE_VPST 8, implicit $vpr + renamable $q0 = MVE_VADDf32 killed renamable $q2, killed renamable $q3, 1, killed renamable $vpr, renamable $lr, killed renamable $q0 + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2 (%ir-block.22): + liveins: $q0 + + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit killed $q0 +...