diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index a5048b9c9e61d..91fb5d6760432 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -1123,24 +1123,83 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation -// where a consecutive multi-vector tuple is constructed from the same indices -// of multiple strided loads. This may still result in unnecessary copies -// between the loads and the tuple. Here we try to return a hint to assign the -// contiguous ZPRMulReg starting at the same register as the first operand of -// the pseudo, which should be a subregister of the first strided load. +// We add regalloc hints for different cases: +// * Choosing a better destination operand for predicated SVE instructions +// where the inactive lanes are undef, by choosing a register that is not +// unique to the other operands of the instruction. // -// For example, if the first strided load has been assigned $z16_z20_z24_z28 -// and the operands of the pseudo are each accessing subregister zsub2, we -// should look through through Order to find a contiguous register which -// begins with $z24 (i.e. $z24_z25_z26_z27). +// * Improve register allocation for SME multi-vector instructions where we can +// benefit from the strided- and contiguous register multi-vector tuples. // +// Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register +// allocation where a consecutive multi-vector tuple is constructed from the +// same indices of multiple strided loads. This may still result in +// unnecessary copies between the loads and the tuple. Here we try to return a +// hint to assign the contiguous ZPRMulReg starting at the same register as +// the first operand of the pseudo, which should be a subregister of the first +// strided load. +// +// For example, if the first strided load has been assigned $z16_z20_z24_z28 +// and the operands of the pseudo are each accessing subregister zsub2, we +// should look through through Order to find a contiguous register which +// begins with $z24 (i.e. $z24_z25_z26_z27). bool AArch64RegisterInfo::getRegAllocationHints( Register VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { - auto &ST = MF.getSubtarget(); + const AArch64InstrInfo *TII = + MF.getSubtarget().getInstrInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // For predicated SVE instructions where the inactive lanes are undef, + // pick a destination register that is not unique to avoid introducing + // a movprfx. + const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg); + if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) { + for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) { + const MachineInstr &Def = *DefOp.getParent(); + if (DefOp.isImplicit() || + (TII->get(Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) != + AArch64::FalseLanesUndef) + continue; + + for (MCPhysReg R : Order) { + auto AddHintIfSuitable = [&](MCPhysReg R, const MachineOperand &MO) { + // R is a suitable register hint if there exists an operand for the + // instruction that is not yet allocated a register or if R matches + // one of the other source operands. + if (!VRM->hasPhys(MO.getReg()) || VRM->getPhys(MO.getReg()) == R) + Hints.push_back(R); + }; + + unsigned Opcode = AArch64::getSVEPseudoMap(Def.getOpcode()); + switch (TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask) { + default: + break; + case AArch64::DestructiveTernaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + AddHintIfSuitable(R, Def.getOperand(4)); + break; + case AArch64::DestructiveBinaryComm: + case AArch64::DestructiveBinaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + break; + case AArch64::DestructiveBinary: + case AArch64::DestructiveBinaryImm: + AddHintIfSuitable(R, Def.getOperand(2)); + break; + } + } + } + + if (Hints.size()) + return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, + MF, VRM); + } + if (!ST.hasSME() || !ST.isStreaming()) return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); @@ -1153,8 +1212,7 @@ bool AArch64RegisterInfo::getRegAllocationHints( // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy // instructions over reducing the number of clobbered callee-save registers, // so we add the strided registers as a hint. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned RegID = MRI.getRegClass(VirtReg)->getID(); + unsigned RegID = RegRC->getID(); if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID || RegID == AArch64::ZPR4StridedOrContiguousRegClassID) { diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll index e086ab92421fb..33ea74912251e 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll @@ -52,12 +52,11 @@ define <2 x i64> @test_mul_sub_2x64_2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: sub v0.2d, v1.2d, v0.2d +; CHECK-NEXT: mul z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d ; CHECK-NEXT: ret %div = sdiv <2 x i64> %a, %b %mul = mul <2 x i64> %c, %d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll index 533e831de0df8..258eaabee9376 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll @@ -14,13 +14,12 @@ define @mull_add( %a, @mul_add_rot_mull( %a, , } @llvm.vector.deinterleave2.nxv4f64( %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll index 1eed9722f57be..b68c0094f84de 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll @@ -200,12 +200,10 @@ define @mul_add_rot_mull( %a, @complex_add_v4f16( %a, , } @llvm.vector.deinterleave2.nxv4f16( %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll index 061fd07489284..00b0095e4309c 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll @@ -18,11 +18,10 @@ define @complex_mul_v4i16( %a, @fshl_rot_illegal_i64( %a, @llvm.fshl.nxv4i64( %a, %a, %b) ret %fshl diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll index 6fbae7edfec0a..2dda03e5c6dab 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll @@ -55,10 +55,9 @@ define void @fadd_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: fadd z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fadd z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: fadd z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fadd_v32f16: @@ -154,10 +153,9 @@ define void @fadd_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: fadd z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fadd z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: fadd z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fadd_v16f32: @@ -253,10 +251,9 @@ define void @fadd_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: fadd z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fadd z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: fadd z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fadd_v8f64: @@ -660,10 +657,9 @@ define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x2] ; VBITS_GE_256-NEXT: fmad z0.h, p0/m, z1.h, z2.h -; VBITS_GE_256-NEXT: movprfx z1, z5 -; VBITS_GE_256-NEXT: fmla z1.h, p0/m, z3.h, z4.h +; VBITS_GE_256-NEXT: fmad z3.h, p0/m, z4.h, z5.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fma_v32f16: @@ -771,10 +767,9 @@ define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x2] ; VBITS_GE_256-NEXT: fmad z0.s, p0/m, z1.s, z2.s -; VBITS_GE_256-NEXT: movprfx z1, z5 -; VBITS_GE_256-NEXT: fmla z1.s, p0/m, z3.s, z4.s +; VBITS_GE_256-NEXT: fmad z3.s, p0/m, z4.s, z5.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fma_v16f32: @@ -881,10 +876,9 @@ define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x2] ; VBITS_GE_256-NEXT: fmad z0.d, p0/m, z1.d, z2.d -; VBITS_GE_256-NEXT: movprfx z1, z5 -; VBITS_GE_256-NEXT: fmla z1.d, p0/m, z3.d, z4.d +; VBITS_GE_256-NEXT: fmad z3.d, p0/m, z4.d, z5.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fma_v8f64: @@ -990,10 +984,9 @@ define void @fmul_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: fmul z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fmul z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: fmul z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmul_v32f16: @@ -1089,10 +1082,9 @@ define void @fmul_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: fmul z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fmul z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: fmul z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmul_v16f32: @@ -1188,10 +1180,9 @@ define void @fmul_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: fmul z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fmul z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: fmul z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmul_v8f64: @@ -1827,10 +1818,9 @@ define void @fsub_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: fsub z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fsub z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: fsub z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fsub_v32f16: @@ -1926,10 +1916,9 @@ define void @fsub_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: fsub z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fsub z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: fsub z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fsub_v16f32: @@ -2025,10 +2014,9 @@ define void @fsub_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: fsub z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: fsub z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: fsub z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fsub_v8f64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll index e1ec5ee5f6137..633b429db3dfd 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll @@ -64,10 +64,9 @@ define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1h { z5.h }, p0/z, [x2] ; VBITS_GE_256-NEXT: fmad z0.h, p0/m, z1.h, z2.h -; VBITS_GE_256-NEXT: movprfx z1, z5 -; VBITS_GE_256-NEXT: fmla z1.h, p0/m, z3.h, z4.h +; VBITS_GE_256-NEXT: fmad z3.h, p0/m, z4.h, z5.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fma_v32f16: @@ -181,10 +180,9 @@ define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x2] ; VBITS_GE_256-NEXT: fmad z0.s, p0/m, z1.s, z2.s -; VBITS_GE_256-NEXT: movprfx z1, z5 -; VBITS_GE_256-NEXT: fmla z1.s, p0/m, z3.s, z4.s +; VBITS_GE_256-NEXT: fmad z3.s, p0/m, z4.s, z5.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fma_v16f32: @@ -297,10 +295,9 @@ define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1d { z5.d }, p0/z, [x2] ; VBITS_GE_256-NEXT: fmad z0.d, p0/m, z1.d, z2.d -; VBITS_GE_256-NEXT: movprfx z1, z5 -; VBITS_GE_256-NEXT: fmla z1.d, p0/m, z3.d, z4.d +; VBITS_GE_256-NEXT: fmad z3.d, p0/m, z4.d, z5.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fma_v8f64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll index de60deeafaf32..90a04995ff15e 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll @@ -55,10 +55,9 @@ define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h +; VBITS_EQ_256-NEXT: fmaxnm z2.h, p0/m, z2.h, z3.h ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_EQ_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmaxnm_v32f16: @@ -154,10 +153,9 @@ define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s +; VBITS_EQ_256-NEXT: fmaxnm z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_EQ_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmaxnm_v16f32: @@ -253,10 +251,9 @@ define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d +; VBITS_EQ_256-NEXT: fmaxnm z2.d, p0/m, z2.d, z3.d ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_EQ_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmaxnm_v8f64: @@ -356,10 +353,9 @@ define void @fminnm_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fminnm z0.h, p0/m, z0.h, z1.h -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fminnm z1.h, p0/m, z1.h, z3.h +; VBITS_EQ_256-NEXT: fminnm z2.h, p0/m, z2.h, z3.h ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_EQ_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fminnm_v32f16: @@ -455,10 +451,9 @@ define void @fminnm_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fminnm z0.s, p0/m, z0.s, z1.s -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fminnm z1.s, p0/m, z1.s, z3.s +; VBITS_EQ_256-NEXT: fminnm z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_EQ_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fminnm_v16f32: @@ -554,10 +549,9 @@ define void @fminnm_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fminnm z0.d, p0/m, z0.d, z1.d -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fminnm z1.d, p0/m, z1.d, z3.d +; VBITS_EQ_256-NEXT: fminnm z2.d, p0/m, z2.d, z3.d ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_EQ_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fminnm_v8f64: @@ -657,10 +651,9 @@ define void @fmax_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmax z0.h, p0/m, z0.h, z1.h -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmax z1.h, p0/m, z1.h, z3.h +; VBITS_EQ_256-NEXT: fmax z2.h, p0/m, z2.h, z3.h ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_EQ_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmax_v32f16: @@ -756,10 +749,9 @@ define void @fmax_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmax z0.s, p0/m, z0.s, z1.s -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmax z1.s, p0/m, z1.s, z3.s +; VBITS_EQ_256-NEXT: fmax z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_EQ_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmax_v16f32: @@ -855,10 +847,9 @@ define void @fmax_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmax z0.d, p0/m, z0.d, z1.d -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmax z1.d, p0/m, z1.d, z3.d +; VBITS_EQ_256-NEXT: fmax z2.d, p0/m, z2.d, z3.d ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_EQ_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmax_v8f64: @@ -958,10 +949,9 @@ define void @fmin_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmin z0.h, p0/m, z0.h, z1.h -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmin z1.h, p0/m, z1.h, z3.h +; VBITS_EQ_256-NEXT: fmin z2.h, p0/m, z2.h, z3.h ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_EQ_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmin_v32f16: @@ -1057,10 +1047,9 @@ define void @fmin_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmin z0.s, p0/m, z0.s, z1.s -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmin z1.s, p0/m, z1.s, z3.s +; VBITS_EQ_256-NEXT: fmin z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_EQ_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmin_v16f32: @@ -1156,10 +1145,9 @@ define void @fmin_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_EQ_256-NEXT: fmin z0.d, p0/m, z0.d, z1.d -; VBITS_EQ_256-NEXT: movprfx z1, z2 -; VBITS_EQ_256-NEXT: fmin z1.d, p0/m, z1.d, z3.d +; VBITS_EQ_256-NEXT: fmin z2.d, p0/m, z2.d, z3.d ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_EQ_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fmin_v8f64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-abd.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-abd.ll index 08a974fa2d9f4..a91b392b7230a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-abd.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-abd.ll @@ -155,10 +155,9 @@ define void @sabd_v64i8_v64i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: sabd z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: sabd z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: sabd z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: sabd_v64i8_v64i64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll index 58fca3a2cf8b6..736239599836c 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll @@ -456,10 +456,9 @@ define void @mul_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: mul z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: mul z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: mul z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: mul_v64i8: @@ -555,10 +554,9 @@ define void @mul_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: mul z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: mul z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: mul z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: mul_v32i16: @@ -654,10 +652,9 @@ define void @mul_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: mul z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: mul z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: mul z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: mul_v16i32: @@ -759,10 +756,9 @@ define void @mul_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: mul z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: mul z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: mul z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: mul_v8i64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll index 4926684ddc2de..c56376887d966 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll @@ -55,10 +55,9 @@ define void @smax_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: smax z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smax z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: smax z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smax_v64i8: @@ -154,10 +153,9 @@ define void @smax_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: smax z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smax z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: smax z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smax_v32i16: @@ -253,10 +251,9 @@ define void @smax_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: smax z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smax z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: smax z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smax_v16i32: @@ -360,10 +357,9 @@ define void @smax_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: smax z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smax z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: smax z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smax_v8i64: @@ -463,10 +459,9 @@ define void @smin_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: smin z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smin z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: smin z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smin_v64i8: @@ -562,10 +557,9 @@ define void @smin_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: smin z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smin z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: smin z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smin_v32i16: @@ -661,10 +655,9 @@ define void @smin_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: smin z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smin z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: smin z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smin_v16i32: @@ -768,10 +761,9 @@ define void @smin_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: smin z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smin z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: smin z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smin_v8i64: @@ -871,10 +863,9 @@ define void @umax_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: umax z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umax z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: umax z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umax_v64i8: @@ -970,10 +961,9 @@ define void @umax_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: umax z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umax z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: umax z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umax_v32i16: @@ -1069,10 +1059,9 @@ define void @umax_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: umax z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umax z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: umax z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umax_v16i32: @@ -1176,10 +1165,9 @@ define void @umax_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: umax z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umax z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: umax z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umax_v8i64: @@ -1279,10 +1267,9 @@ define void @umin_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: umin z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umin z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: umin z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umin_v64i8: @@ -1378,10 +1365,9 @@ define void @umin_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: umin z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umin z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: umin z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umin_v32i16: @@ -1477,10 +1463,9 @@ define void @umin_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: umin z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umin z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: umin z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umin_v16i32: @@ -1584,10 +1569,9 @@ define void @umin_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: umin z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umin z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: umin z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umin_v8i64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll index 41cce354cc9de..dfbc23707e418 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -78,10 +78,9 @@ define void @smulh_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: smulh z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smulh z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: smulh z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smulh_v64i8: @@ -209,10 +208,9 @@ define void @smulh_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: smulh z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smulh z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: smulh z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smulh_v32i16: @@ -340,10 +338,9 @@ define void @smulh_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: smulh z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smulh z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: smulh z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smulh_v16i32: @@ -471,10 +468,9 @@ define void @smulh_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: smulh z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: smulh z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: smulh z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smulh_v8i64: @@ -607,10 +603,9 @@ define void @umulh_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: umulh z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umulh z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: umulh z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umulh_v64i8: @@ -739,10 +734,9 @@ define void @umulh_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: umulh z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umulh z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: umulh z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umulh_v32i16: @@ -870,10 +864,9 @@ define void @umulh_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: umulh z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umulh z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: umulh z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umulh_v16i32: @@ -1001,10 +994,9 @@ define void @umulh_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: umulh z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: umulh z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umulh_v8i64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll index 27be84419d59e..14204e965fb4d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -616,10 +616,9 @@ define void @srem_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: movprfx z5, z3 ; VBITS_GE_256-NEXT: sdiv z5.s, p0/m, z5.s, z4.s ; VBITS_GE_256-NEXT: mls z0.s, p0/m, z2.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z3 -; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z4.s +; VBITS_GE_256-NEXT: mls z3.s, p0/m, z5.s, z4.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: srem_v16i32: @@ -744,11 +743,10 @@ define void @srem_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_128-NEXT: movprfx z18, z16 ; VBITS_GE_128-NEXT: sdiv z18.d, p0/m, z18.d, z17.d ; VBITS_GE_128-NEXT: msb z0.d, p0/m, z4.d, z1.d -; VBITS_GE_128-NEXT: movprfx z1, z2 -; VBITS_GE_128-NEXT: mls z1.d, p0/m, z19.d, z3.d +; VBITS_GE_128-NEXT: mls z2.d, p0/m, z19.d, z3.d ; VBITS_GE_128-NEXT: mls z16.d, p0/m, z18.d, z17.d ; VBITS_GE_128-NEXT: mls z5.d, p0/m, z7.d, z6.d -; VBITS_GE_128-NEXT: stp q0, q1, [x0] +; VBITS_GE_128-NEXT: stp q0, q2, [x0] ; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32] ; VBITS_GE_128-NEXT: ret ; @@ -765,10 +763,9 @@ define void @srem_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: movprfx z5, z3 ; VBITS_GE_256-NEXT: sdiv z5.d, p0/m, z5.d, z4.d ; VBITS_GE_256-NEXT: mls z0.d, p0/m, z2.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z3 -; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z4.d +; VBITS_GE_256-NEXT: mls z3.d, p0/m, z5.d, z4.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: srem_v8i64: @@ -1434,10 +1431,9 @@ define void @urem_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: movprfx z5, z3 ; VBITS_GE_256-NEXT: udiv z5.s, p0/m, z5.s, z4.s ; VBITS_GE_256-NEXT: mls z0.s, p0/m, z2.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z3 -; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z4.s +; VBITS_GE_256-NEXT: mls z3.s, p0/m, z5.s, z4.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: urem_v16i32: @@ -1562,11 +1558,10 @@ define void @urem_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_128-NEXT: movprfx z18, z16 ; VBITS_GE_128-NEXT: udiv z18.d, p0/m, z18.d, z17.d ; VBITS_GE_128-NEXT: msb z0.d, p0/m, z4.d, z1.d -; VBITS_GE_128-NEXT: movprfx z1, z2 -; VBITS_GE_128-NEXT: mls z1.d, p0/m, z19.d, z3.d +; VBITS_GE_128-NEXT: mls z2.d, p0/m, z19.d, z3.d ; VBITS_GE_128-NEXT: mls z16.d, p0/m, z18.d, z17.d ; VBITS_GE_128-NEXT: mls z5.d, p0/m, z7.d, z6.d -; VBITS_GE_128-NEXT: stp q0, q1, [x0] +; VBITS_GE_128-NEXT: stp q0, q2, [x0] ; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32] ; VBITS_GE_128-NEXT: ret ; @@ -1583,10 +1578,9 @@ define void @urem_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: movprfx z5, z3 ; VBITS_GE_256-NEXT: udiv z5.d, p0/m, z5.d, z4.d ; VBITS_GE_256-NEXT: mls z0.d, p0/m, z2.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z3 -; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z4.d +; VBITS_GE_256-NEXT: mls z3.d, p0/m, z5.d, z4.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: urem_v8i64: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll index 0fa8c8f50e29c..a8afa90df96e4 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll @@ -57,10 +57,9 @@ define void @ashr_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: asr z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: asr z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: asr z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: ashr_v64i8: @@ -158,10 +157,9 @@ define void @ashr_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: asr z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: asr z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: asr z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: ashr_v32i16: @@ -259,10 +257,9 @@ define void @ashr_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: asr z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: asr z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: asr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: ashr_v16i32: @@ -360,10 +357,9 @@ define void @ashr_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: asr z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: asr z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: asr z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: ashr_v8i64: @@ -465,10 +461,9 @@ define void @lshr_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsr z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsr z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: lsr z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: lshr_v64i8: @@ -566,10 +561,9 @@ define void @lshr_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsr z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsr z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: lsr z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: lshr_v32i16: @@ -667,10 +661,9 @@ define void @lshr_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsr z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsr z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: lsr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: lshr_v16i32: @@ -768,10 +761,9 @@ define void @lshr_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsr z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsr z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: lsr z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: lshr_v8i64: @@ -871,10 +863,9 @@ define void @shl_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsl z0.b, p0/m, z0.b, z1.b -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsl z1.b, p0/m, z1.b, z3.b +; VBITS_GE_256-NEXT: lsl z2.b, p0/m, z2.b, z3.b ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shl_v64i8: @@ -970,10 +961,9 @@ define void @shl_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsl z0.h, p0/m, z0.h, z1.h -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsl z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: lsl z2.h, p0/m, z2.h, z3.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shl_v32i16: @@ -1069,10 +1059,9 @@ define void @shl_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsl z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsl z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: lsl z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shl_v16i32: @@ -1168,10 +1157,9 @@ define void @shl_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: lsl z0.d, p0/m, z0.d, z1.d -; VBITS_GE_256-NEXT: movprfx z1, z2 -; VBITS_GE_256-NEXT: lsl z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: lsl z2.d, p0/m, z2.d, z3.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shl_v8i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll index f2c882c370eab..20c06f0a1aff5 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll @@ -193,9 +193,8 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fadd z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fadd_v16f16: @@ -397,9 +396,8 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fadd z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fadd_v8f32: @@ -479,9 +477,8 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fadd_v4f64: @@ -703,9 +700,8 @@ define void @fdiv_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fdivr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fdiv z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fdiv z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fdiv_v16f16: @@ -907,9 +903,8 @@ define void @fdiv_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fdiv z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fdiv z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fdiv_v8f32: @@ -989,9 +984,8 @@ define void @fdiv_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fdiv z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fdiv_v4f64: @@ -1253,9 +1247,8 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldp q1, q5, [x2] ; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmad z3.h, p0/m, z4.h, z5.h +; CHECK-NEXT: stp q0, q3, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fma_v16f16: @@ -1501,9 +1494,8 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldp q1, q5, [x2] ; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmad z3.s, p0/m, z4.s, z5.s +; CHECK-NEXT: stp q0, q3, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fma_v8f32: @@ -1595,9 +1587,8 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldp q1, q5, [x2] ; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmad z3.d, p0/m, z4.d, z5.d +; CHECK-NEXT: stp q0, q3, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fma_v4f64: @@ -1824,9 +1815,8 @@ define void @fmul_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmul z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmul_v16f16: @@ -2028,9 +2018,8 @@ define void @fmul_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmul z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmul_v8f32: @@ -2110,9 +2099,8 @@ define void @fmul_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmul z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmul_v4f64: @@ -3152,9 +3140,8 @@ define void @fsub_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fsub z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fsub z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fsub_v16f16: @@ -3356,9 +3343,8 @@ define void @fsub_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fsub z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fsub z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fsub_v8f32: @@ -3438,9 +3424,8 @@ define void @fsub_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fsub z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fsub z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fsub_v4f64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll index 680cb4fb0a791..dbacd77315198 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll @@ -208,9 +208,8 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldp q1, q5, [x2] ; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmad z3.h, p0/m, z4.h, z5.h +; CHECK-NEXT: stp q0, q3, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fma_v16f16: @@ -526,9 +525,8 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldp q1, q5, [x2] ; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmad z3.s, p0/m, z4.s, z5.s +; CHECK-NEXT: stp q0, q3, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fma_v8f32: @@ -642,9 +640,8 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldp q1, q5, [x2] ; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmad z3.d, p0/m, z4.d, z5.d +; CHECK-NEXT: stp q0, q3, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fma_v4f64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll index 84aea185917fa..e53d6a9081154 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll @@ -143,9 +143,8 @@ define void @fmaxnm_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmaxnm z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmaxnm_v16f16: @@ -347,9 +346,8 @@ define void @fmaxnm_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmaxnm z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmaxnm_v8f32: @@ -448,9 +446,8 @@ define void @fmaxnm_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmaxnm z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmaxnm_v4f64: @@ -622,9 +619,8 @@ define void @fminnm_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fminnm z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fminnm_v16f16: @@ -826,9 +822,8 @@ define void @fminnm_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fminnm z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fminnm_v8f32: @@ -927,9 +922,8 @@ define void @fminnm_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fminnm z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fminnm_v4f64: @@ -1101,9 +1095,8 @@ define void @fmax_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmax z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmax_v16f16: @@ -1305,9 +1298,8 @@ define void @fmax_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmax z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmax_v8f32: @@ -1406,9 +1398,8 @@ define void @fmax_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmax z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmax_v4f64: @@ -1580,9 +1571,8 @@ define void @fmin_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmin z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmin_v16f16: @@ -1784,9 +1774,8 @@ define void @fmin_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmin z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmin_v8f32: @@ -1885,9 +1874,8 @@ define void @fmin_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fmin z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fmin_v4f64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll index 4360f3a12014a..02b5469c0ff85 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll @@ -975,9 +975,8 @@ define void @mul_v32i8(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.b, vl16 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: mul z0.b, p0/m, z0.b, z1.b -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: mul z1.b, p0/m, z1.b, z3.b -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: mul z2.b, p0/m, z2.b, z3.b +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v32i8: @@ -1286,9 +1285,8 @@ define void @mul_v16i16(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.h, vl8 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: mul z1.h, p0/m, z1.h, z3.h -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: mul z2.h, p0/m, z2.h, z3.h +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v16i16: @@ -1467,9 +1465,8 @@ define void @mul_v8i32(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.s, vl4 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: mul z1.s, p0/m, z1.s, z3.s -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: mul z2.s, p0/m, z2.s, z3.s +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v8i32: @@ -1599,9 +1596,8 @@ define void @mul_v4i64(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.d, vl2 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: mul z1.d, p0/m, z1.d, z3.d -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: mul z2.d, p0/m, z2.d, z3.d +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v4i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll index 1fdcd4f826870..8e1d61b51e2bb 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -779,9 +779,8 @@ define void @sdiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: sdiv_v8i32: @@ -886,9 +885,8 @@ define void @sdiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: sdivr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: sdiv z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: sdiv_v4i64: @@ -1693,9 +1691,8 @@ define void @udiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: udiv_v8i32: @@ -1800,9 +1797,8 @@ define void @udiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: udivr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: udiv z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: udiv_v4i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll index 1bca7dd09d9b7..d858d8171926e 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll @@ -179,9 +179,8 @@ define void @smax_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smax z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smax z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smax_v32i8: @@ -473,9 +472,8 @@ define void @smax_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smax z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smax z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smax_v16i16: @@ -651,9 +649,8 @@ define void @smax_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smax z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smax z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smax_v8i32: @@ -771,9 +768,8 @@ define void @smax_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smax z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smax_v4i64: @@ -985,9 +981,8 @@ define void @smin_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smin z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smin z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smin_v32i8: @@ -1279,9 +1274,8 @@ define void @smin_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smin z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smin z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smin_v16i16: @@ -1457,9 +1451,8 @@ define void @smin_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smin z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smin z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smin_v8i32: @@ -1577,9 +1570,8 @@ define void @smin_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: smin z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: smin z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: smin_v4i64: @@ -1791,9 +1783,8 @@ define void @umax_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umax z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umax z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umax_v32i8: @@ -2085,9 +2076,8 @@ define void @umax_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umax z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umax z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umax_v16i16: @@ -2263,9 +2253,8 @@ define void @umax_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umax z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umax_v8i32: @@ -2383,9 +2372,8 @@ define void @umax_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umax z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umax z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umax_v4i64: @@ -2597,9 +2585,8 @@ define void @umin_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umin z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umin z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umin_v32i8: @@ -2891,9 +2878,8 @@ define void @umin_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umin z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umin z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umin_v16i16: @@ -3069,9 +3055,8 @@ define void @umin_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umin z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umin z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umin_v8i32: @@ -3189,9 +3174,8 @@ define void @umin_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: umin z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: umin z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: umin_v4i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll index 0c97eedd4362d..85b7b4d010062 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll @@ -294,9 +294,8 @@ define void @smulh_v32i8(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.b, vl16 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: smulh z1.b, p0/m, z1.b, z3.b -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: smulh z2.b, p0/m, z2.b, z3.b +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v32i8: @@ -755,9 +754,8 @@ define void @smulh_v16i16(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.h, vl8 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: smulh z1.h, p0/m, z1.h, z3.h -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: smulh z2.h, p0/m, z2.h, z3.h +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v16i16: @@ -1001,9 +999,8 @@ define void @smulh_v8i32(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.s, vl4 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: smulh z1.s, p0/m, z1.s, z3.s -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: smulh z2.s, p0/m, z2.s, z3.s +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v8i32: @@ -1159,9 +1156,8 @@ define void @smulh_v4i64(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.d, vl2 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: smulh z1.d, p0/m, z1.d, z3.d -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: smulh z2.d, p0/m, z2.d, z3.d +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v4i64: @@ -1494,9 +1490,8 @@ define void @umulh_v32i8(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.b, vl16 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: umulh z1.b, p0/m, z1.b, z3.b -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: umulh z2.b, p0/m, z2.b, z3.b +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v32i8: @@ -1954,9 +1949,8 @@ define void @umulh_v16i16(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.h, vl8 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: umulh z1.h, p0/m, z1.h, z3.h -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: umulh z2.h, p0/m, z2.h, z3.h +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v16i16: @@ -2200,9 +2194,8 @@ define void @umulh_v8i32(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.s, vl4 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: umulh z1.s, p0/m, z1.s, z3.s -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: umulh z2.s, p0/m, z2.s, z3.s +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v8i32: @@ -2358,9 +2351,8 @@ define void @umulh_v4i64(ptr %a, ptr %b) { ; SVE-NEXT: ptrue p0.d, vl2 ; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; SVE-NEXT: movprfx z1, z2 -; SVE-NEXT: umulh z1.d, p0/m, z1.d, z3.d -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: umulh z2.d, p0/m, z2.d, z3.d +; SVE-NEXT: stp q0, q2, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v4i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll index 372f6a06bf64b..c4b6c0e6e924c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll @@ -883,9 +883,8 @@ define void @srem_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z3.s ; CHECK-NEXT: msb z0.s, p0/m, z4.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: mls z2.s, p0/m, z5.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: srem_v8i32: @@ -1013,9 +1012,8 @@ define void @srem_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: sdiv z5.d, p0/m, z5.d, z3.d ; CHECK-NEXT: msb z0.d, p0/m, z4.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: mls z2.d, p0/m, z5.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: srem_v4i64: @@ -1933,9 +1931,8 @@ define void @urem_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z3.s ; CHECK-NEXT: msb z0.s, p0/m, z4.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: mls z2.s, p0/m, z5.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: urem_v8i32: @@ -2063,9 +2060,8 @@ define void @urem_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: udiv z5.d, p0/m, z5.d, z3.d ; CHECK-NEXT: msb z0.d, p0/m, z4.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: mls z2.d, p0/m, z5.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: urem_v4i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll index d0f99211e80fc..4cf8945575ded 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll @@ -195,9 +195,8 @@ define void @ashr_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: asr z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: asr z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ashr_v32i8: @@ -476,9 +475,8 @@ define void @ashr_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: asr z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ashr_v16i16: @@ -632,9 +630,8 @@ define void @ashr_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: asr z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ashr_v8i32: @@ -739,9 +736,8 @@ define void @ashr_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: asr z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: asr z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: ashr_v4i64: @@ -965,9 +961,8 @@ define void @lshr_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsr z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsr z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: lshr_v32i8: @@ -1246,9 +1241,8 @@ define void @lshr_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsr z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: lshr_v16i16: @@ -1402,9 +1396,8 @@ define void @lshr_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: lshr_v8i32: @@ -1509,9 +1502,8 @@ define void @lshr_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsr z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: lshr_v4i64: @@ -1764,9 +1756,8 @@ define void @shl_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z3.b +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: shl_v32i8: @@ -2014,9 +2005,8 @@ define void @shl_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: shl_v16i16: @@ -2170,9 +2160,8 @@ define void @shl_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: shl_v8i32: @@ -2277,9 +2266,8 @@ define void @shl_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: shl_v4i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll index 74e5fe7352cfd..e9b2f539b30cc 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll @@ -954,9 +954,8 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fadd z2.h, p0/m, z2.h, z3.h +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fadd_v16f16: @@ -1170,9 +1169,8 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fadd z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fadd_v8f32: @@ -1258,9 +1256,8 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fadd_v4f64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll index e0e88c47fb55c..e78671aaddf18 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll @@ -526,10 +526,9 @@ define void @zip_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: zip1 z5.d, z0.d, z2.d ; CHECK-NEXT: trn2 z1.d, z1.d, z3.d ; CHECK-NEXT: trn2 z0.d, z0.d, z2.d -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z5.d +; CHECK-NEXT: fadd z4.d, p0/m, z4.d, z5.d ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: stp q4, q0, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip_v4f64: @@ -2159,10 +2158,9 @@ define void @zip_vscale2_4(ptr %a, ptr %b) { ; CHECK-NEXT: zip1 z5.d, z0.d, z2.d ; CHECK-NEXT: trn2 z1.d, z1.d, z3.d ; CHECK-NEXT: trn2 z0.d, z0.d, z2.d -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z5.d +; CHECK-NEXT: fadd z4.d, p0/m, z4.d, z5.d ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: stp q4, q0, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: zip_vscale2_4: diff --git a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll index 6af26067cd6d6..0472d5c1935f5 100644 --- a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll +++ b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll @@ -36,10 +36,9 @@ define i32 @test( %bin.rdx, %bin.rdx2) { ; CHECK-NEXT: mla z0.s, p0/m, z25.s, z24.s ; CHECK-NEXT: mad z2.s, p0/m, z6.s, z4.s ; CHECK-NEXT: mad z1.s, p0/m, z3.s, z26.s -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: mla z3.s, p0/m, z28.s, z7.s +; CHECK-NEXT: mla z5.s, p0/m, z28.s, z7.s ; CHECK-NEXT: add z0.s, z2.s, z0.s -; CHECK-NEXT: add z1.s, z3.s, z1.s +; CHECK-NEXT: add z1.s, z5.s, z1.s ; CHECK-NEXT: add z0.s, z1.s, z0.s ; CHECK-NEXT: uaddv d0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/sve2-xar.ll b/llvm/test/CodeGen/AArch64/sve2-xar.ll index 888e94d42f449..8f6f4510d8388 100644 --- a/llvm/test/CodeGen/AArch64/sve2-xar.ll +++ b/llvm/test/CodeGen/AArch64/sve2-xar.ll @@ -157,10 +157,9 @@ define @xar_nxv2i64_l_neg1( %x, %x, %y %b = call @llvm.fshl.nxv2i64( %a, %a, %z)