diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 3d45db34964471..31c6234f02d639 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14682,7 +14682,9 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// Check that N is CMPZ(CSINC(0, 0, CC, X)), return X if valid. +// Check that N is CMPZ(CSINC(0, 0, CC, X)), +// or CMPZ(CMOV(1, 0, CC, $cpsr, X)) +// return X if valid. static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1))) return SDValue(); @@ -14696,12 +14698,24 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse()) CSInc = CSInc.getOperand(0); - if (CSInc.getOpcode() != ARMISD::CSINC || - !isNullConstant(CSInc.getOperand(0)) || - !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse()) - return SDValue(); - CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); - return CSInc.getOperand(3); + if (CSInc.getOpcode() == ARMISD::CSINC && + isNullConstant(CSInc.getOperand(0)) && + isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) { + CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); + return CSInc.getOperand(3); + } + if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) && + isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) { + CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); + return CSInc.getOperand(4); + } + if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) && + isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) { + CC = ARMCC::getOppositeCondition( + (ARMCC::CondCodes)CSInc.getConstantOperandVal(2)); + return CSInc.getOperand(4); + } + return SDValue(); } static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) { diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll index 0ff7e0635450dc..c6db4d3ae47e45 100644 --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -482,11 +482,9 @@ define void @test_copysign(half* %p, half* %q) { ; CHECK-NEXT: vstr.16 s0, [sp] ; CHECK-NEXT: vldr.16 s0, [r0] ; CHECK-NEXT: ldrb r1, [sp, #1] -; CHECK-NEXT: ands r1, r1, #128 ; CHECK-NEXT: vabs.f16 s0, s0 -; CHECK-NEXT: movwne r1, #1 +; CHECK-NEXT: tst r1, #128 ; CHECK-NEXT: vneg.f16 s2, s0 -; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: vseleq.f16 s0, s0, s2 ; CHECK-NEXT: vstr.16 s0, [r0] ; CHECK-NEXT: add sp, sp, #4 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 74dd9fe154aa49..a4d470b72d4ead 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -2379,67 +2379,42 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r2, r1 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: clz r7, r2 -; CHECK-NEXT: movwmi r3, #1 -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: vmov r0, r2, d9 +; CHECK-NEXT: cmn r4, #-2147483647 ; CHECK-NEXT: mvn r3, #-2147483648 +; CHECK-NEXT: movlo r3, r4 ; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: movne r3, r4 -; CHECK-NEXT: cmn r4, #-2147483647 -; CHECK-NEXT: movhs r4, r5 -; CHECK-NEXT: lsr r7, r7, #5 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movpl r4, r5 +; CHECK-NEXT: movpl r1, r6 ; CHECK-NEXT: moveq r4, r3 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movpl r2, r6 -; CHECK-NEXT: cmn r2, #1 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: add r2, r2, #1 -; CHECK-NEXT: movwgt r3, #1 -; CHECK-NEXT: clz r2, r2 -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: cmn r1, #1 ; CHECK-NEXT: mov r3, #-2147483648 -; CHECK-NEXT: movne r3, r4 ; CHECK-NEXT: mov r7, #-2147483648 +; CHECK-NEXT: movgt r3, r4 ; CHECK-NEXT: cmp r4, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 ; CHECK-NEXT: movls r4, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r4, r3 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r4, r3 +; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: vmov.32 d0[0], r4 -; CHECK-NEXT: movne r2, r0 ; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: movlo r5, r0 -; CHECK-NEXT: clz r0, r1 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: movlo r2, r0 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movpl r1, r6 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: add r1, r1, #1 -; CHECK-NEXT: movwgt r6, #1 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: movmi r5, r0 +; CHECK-NEXT: movmi r6, r1 +; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: cmn r6, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: movne r0, r5 +; CHECK-NEXT: vmov.32 d0[0], r4 +; CHECK-NEXT: movgt r0, r5 ; CHECK-NEXT: cmp r5, #-2147483648 ; CHECK-NEXT: movls r5, r7 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: cmn r6, #1 +; CHECK-NEXT: movne r5, r0 ; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} @@ -2485,63 +2460,45 @@ entry: define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: movwmi r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: clz r3, r4 -; CHECK-NEXT: mvn r8, #0 -; CHECK-NEXT: movne r8, r0 +; CHECK-NEXT: vmov r2, r12, d9 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvn r3, #0 ; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: lsr r3, r3, #5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movne r8, r0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movpl r4, r5 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: movmi r3, r0 +; CHECK-NEXT: movpl r1, r5 +; CHECK-NEXT: moveq r3, r0 +; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: mvn r7, #0 +; CHECK-NEXT: mvn r4, #0 ; CHECK-NEXT: movwgt r6, #1 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r8 +; CHECK-NEXT: movne r6, r3 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r6, r3 ; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r12 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov.32 d0[0], r6 +; CHECK-NEXT: movmi r4, r0 ; CHECK-NEXT: movpl r1, r5 -; CHECK-NEXT: clz r0, r1 +; CHECK-NEXT: moveq r4, r0 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: movwgt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: clz r0, r4 -; CHECK-NEXT: movne r5, r7 -; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r6, r8 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: movne r5, r4 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r5, r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x double> %x to <2 x i64> %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> ) @@ -2560,129 +2517,78 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: mov r8, #-2147483648 ; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: mov r9, #0 ; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r5, s18 +; CHECK-NEXT: vmov r5, s16 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movwmi r0, #1 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cmn r0, #-2147483647 ; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: movne r0, r4 -; CHECK-NEXT: cmn r4, #-2147483647 -; CHECK-NEXT: movhs r4, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: movlo r0, r4 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movpl r4, r7 ; CHECK-NEXT: movpl r1, r9 +; CHECK-NEXT: moveq r4, r0 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: add r1, r1, #1 -; CHECK-NEXT: movwgt r0, #1 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: movne r0, r4 +; CHECK-NEXT: movgt r0, r4 ; CHECK-NEXT: cmp r4, #-2147483648 ; CHECK-NEXT: movls r4, r8 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r4, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movwmi r0, #1 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cmn r0, #-2147483647 ; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: movne r0, r5 -; CHECK-NEXT: cmn r5, #-2147483647 -; CHECK-NEXT: movhs r5, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: mov r2, #-2147483648 +; CHECK-NEXT: movlo r0, r5 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movpl r5, r7 ; CHECK-NEXT: movpl r1, r9 +; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #-2147483648 -; CHECK-NEXT: movwgt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: add r0, r1, #1 -; CHECK-NEXT: movne r2, r5 -; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: movgt r2, r5 ; CHECK-NEXT: cmp r5, #-2147483648 ; CHECK-NEXT: movls r5, r8 -; CHECK-NEXT: lsr r1, r0, #5 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r5, r2 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r5, r2 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movwmi r0, #1 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: cmn r0, #-2147483647 ; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: movne r0, r6 -; CHECK-NEXT: cmn r6, #-2147483647 -; CHECK-NEXT: movhs r6, r7 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: mov r2, #-2147483648 +; CHECK-NEXT: movlo r0, r6 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movpl r6, r7 ; CHECK-NEXT: movpl r1, r9 +; CHECK-NEXT: moveq r6, r0 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #-2147483648 -; CHECK-NEXT: movwgt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: add r0, r1, #1 -; CHECK-NEXT: movne r2, r6 -; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: movgt r2, r6 ; CHECK-NEXT: cmp r6, #-2147483648 ; CHECK-NEXT: movls r6, r8 -; CHECK-NEXT: lsr r1, r0, #5 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r6, r2 +; CHECK-NEXT: cmn r1, #1 +; CHECK-NEXT: movne r6, r2 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: vmov.32 d0[0], r6 -; CHECK-NEXT: movne r2, r0 ; CHECK-NEXT: cmn r0, #-2147483647 -; CHECK-NEXT: movlo r7, r0 -; CHECK-NEXT: clz r0, r1 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r7, r2 +; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: movlo r2, r0 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movpl r1, r9 -; CHECK-NEXT: cmn r1, #1 -; CHECK-NEXT: add r1, r1, #1 -; CHECK-NEXT: movwgt r9, #1 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movmi r7, r0 +; CHECK-NEXT: movmi r9, r1 +; CHECK-NEXT: moveq r7, r2 +; CHECK-NEXT: cmn r9, #1 ; CHECK-NEXT: mov r0, #-2147483648 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movne r0, r7 +; CHECK-NEXT: vmov.32 d1[0], r6 +; CHECK-NEXT: movgt r0, r7 ; CHECK-NEXT: cmp r7, #-2147483648 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: movls r7, r8 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r7, r0 +; CHECK-NEXT: cmn r9, #1 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} @@ -2743,115 +2649,75 @@ entry: define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: mvn r9, #0 ; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r5, s16 +; CHECK-NEXT: vmov r8, s18 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: movwmi r3, #1 -; CHECK-NEXT: clz r6, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvn r3, #0 -; CHECK-NEXT: movne r3, r2 -; CHECK-NEXT: lsr r6, r6, #5 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mvn r2, #0 +; CHECK-NEXT: movmi r2, r0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: movne r3, r2 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r2, r0 ; CHECK-NEXT: movpl r1, r7 ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r11, #0 -; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: movwgt r11, #1 -; CHECK-NEXT: cmp r11, #0 -; CHECK-NEXT: movne r11, r3 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: vmov r8, s16 -; CHECK-NEXT: movne r11, r3 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: mvn r10, #0 -; CHECK-NEXT: movwmi r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: clz r1, r4 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: lsr r1, r1, #5 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movne r10, r0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movpl r4, r7 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwgt r4, #1 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movwgt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: movne r6, r10 +; CHECK-NEXT: movne r4, r2 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: moveq r4, r2 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: clz r3, r1 -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvn r2, #0 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: lsr r3, r3, #5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movne r2, r0 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: movmi r2, r0 ; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: clz r0, r1 +; CHECK-NEXT: moveq r2, r0 ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: movwgt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: movne r5, r2 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: movne r5, r2 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r5, r2 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwmi r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: clz r2, r1 -; CHECK-NEXT: movne r9, r0 -; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: lsr r2, r2, #5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r9, r0 +; CHECK-NEXT: mvn r2, #0 +; CHECK-NEXT: movmi r2, r0 +; CHECK-NEXT: movpl r1, r7 +; CHECK-NEXT: moveq r2, r0 +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwgt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r6, r2 +; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov.32 d1[0], r6 +; CHECK-NEXT: movmi r9, r0 ; CHECK-NEXT: movpl r1, r7 -; CHECK-NEXT: clz r0, r1 +; CHECK-NEXT: moveq r9, r0 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: movwgt r7, #1 ; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: vmov.32 d0[0], r5 ; CHECK-NEXT: movne r7, r9 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: clz r0, r4 -; CHECK-NEXT: movne r7, r9 -; CHECK-NEXT: vmov.32 d1[0], r7 -; CHECK-NEXT: lsr r0, r0, #5 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r6, r10 -; CHECK-NEXT: vmov.32 d1[1], r11 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: moveq r7, r9 +; CHECK-NEXT: vmov.32 d0[1], r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2868,136 +2734,85 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} ; CHECK-NEON-NEXT: vmov r0, s3 -; CHECK-NEON-NEXT: vmov.f32 s20, s2 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 -; CHECK-NEON-NEXT: vmov.f32 s18, s0 +; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s20 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: clz r3, r1 -; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: cmn r4, #-2147483647 ; CHECK-NEON-NEXT: mvn r2, #-2147483648 -; CHECK-NEON-NEXT: movne r2, r4 +; CHECK-NEON-NEXT: movlo r2, r4 ; CHECK-NEON-NEXT: mvn r7, #-2147483648 -; CHECK-NEON-NEXT: cmn r4, #-2147483647 -; CHECK-NEON-NEXT: lsr r3, r3, #5 -; CHECK-NEON-NEXT: movhs r4, r7 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: moveq r4, r2 -; CHECK-NEON-NEXT: mov r9, #0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: mov r9, #0 +; CHECK-NEON-NEXT: movpl r4, r7 ; CHECK-NEON-NEXT: movpl r1, r9 +; CHECK-NEON-NEXT: moveq r4, r2 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: movwgt r2, #1 -; CHECK-NEON-NEXT: add r1, r1, #1 -; CHECK-NEON-NEXT: clz r1, r1 -; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: mov r2, #-2147483648 ; CHECK-NEON-NEXT: mov r8, #-2147483648 -; CHECK-NEON-NEXT: movne r2, r4 +; CHECK-NEON-NEXT: movgt r2, r4 ; CHECK-NEON-NEXT: cmp r4, #-2147483648 ; CHECK-NEON-NEXT: movls r4, r8 -; CHECK-NEON-NEXT: lsr r1, r1, #5 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r4, r2 +; CHECK-NEON-NEXT: cmn r1, #1 +; CHECK-NEON-NEXT: movne r4, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: clz r2, r1 -; CHECK-NEON-NEXT: movwmi r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: cmn r0, #-2147483647 ; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: lsr r2, r2, #5 -; CHECK-NEON-NEXT: movne r0, r5 -; CHECK-NEON-NEXT: cmn r5, #-2147483647 -; CHECK-NEON-NEXT: movhs r5, r7 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: moveq r5, r0 +; CHECK-NEON-NEXT: mov r2, #-2147483648 +; CHECK-NEON-NEXT: movlo r0, r5 ; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: movpl r5, r7 ; CHECK-NEON-NEXT: movpl r1, r9 +; CHECK-NEON-NEXT: moveq r5, r0 +; CHECK-NEON-NEXT: vmov r0, s18 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: mov r2, #-2147483648 -; CHECK-NEON-NEXT: movwgt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: add r0, r1, #1 -; CHECK-NEON-NEXT: movne r2, r5 -; CHECK-NEON-NEXT: clz r0, r0 +; CHECK-NEON-NEXT: movgt r2, r5 ; CHECK-NEON-NEXT: cmp r5, #-2147483648 ; CHECK-NEON-NEXT: movls r5, r8 -; CHECK-NEON-NEXT: lsr r1, r0, #5 -; CHECK-NEON-NEXT: vmov r0, s18 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r5, r2 +; CHECK-NEON-NEXT: cmn r1, #1 +; CHECK-NEON-NEXT: movne r5, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r6, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: clz r2, r1 -; CHECK-NEON-NEXT: movwmi r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: cmn r0, #-2147483647 ; CHECK-NEON-NEXT: mvn r0, #-2147483648 -; CHECK-NEON-NEXT: lsr r2, r2, #5 -; CHECK-NEON-NEXT: movne r0, r6 -; CHECK-NEON-NEXT: cmn r6, #-2147483647 -; CHECK-NEON-NEXT: movhs r6, r7 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: moveq r6, r0 +; CHECK-NEON-NEXT: mov r2, #-2147483648 +; CHECK-NEON-NEXT: movlo r0, r6 ; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: movpl r6, r7 ; CHECK-NEON-NEXT: movpl r1, r9 +; CHECK-NEON-NEXT: moveq r6, r0 +; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: mov r2, #-2147483648 -; CHECK-NEON-NEXT: movwgt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: add r0, r1, #1 -; CHECK-NEON-NEXT: movne r2, r6 -; CHECK-NEON-NEXT: clz r0, r0 +; CHECK-NEON-NEXT: movgt r2, r6 ; CHECK-NEON-NEXT: cmp r6, #-2147483648 ; CHECK-NEON-NEXT: movls r6, r8 -; CHECK-NEON-NEXT: lsr r1, r0, #5 -; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r6, r2 +; CHECK-NEON-NEXT: cmn r1, #1 +; CHECK-NEON-NEXT: movne r6, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: mvn r2, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d0[0], r6 -; CHECK-NEON-NEXT: movne r2, r0 ; CHECK-NEON-NEXT: cmn r0, #-2147483647 -; CHECK-NEON-NEXT: movlo r7, r0 -; CHECK-NEON-NEXT: clz r0, r1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: moveq r7, r2 +; CHECK-NEON-NEXT: mvn r2, #-2147483648 +; CHECK-NEON-NEXT: movlo r2, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r9 -; CHECK-NEON-NEXT: cmn r1, #1 -; CHECK-NEON-NEXT: add r1, r1, #1 -; CHECK-NEON-NEXT: movwgt r9, #1 -; CHECK-NEON-NEXT: clz r1, r1 -; CHECK-NEON-NEXT: cmp r9, #0 +; CHECK-NEON-NEXT: movmi r7, r0 +; CHECK-NEON-NEXT: movmi r9, r1 +; CHECK-NEON-NEXT: moveq r7, r2 +; CHECK-NEON-NEXT: cmn r9, #1 ; CHECK-NEON-NEXT: mov r0, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: movne r0, r7 +; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: movgt r0, r7 ; CHECK-NEON-NEXT: cmp r7, #-2147483648 +; CHECK-NEON-NEXT: vmov.32 d0[0], r5 ; CHECK-NEON-NEXT: movls r7, r8 -; CHECK-NEON-NEXT: lsr r1, r1, #5 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: moveq r7, r0 +; CHECK-NEON-NEXT: cmn r9, #1 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: movne r7, r0 ; CHECK-NEON-NEXT: vmov.32 d0[1], r7 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} @@ -3013,131 +2828,80 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: mov r4, r0 -; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] -; CHECK-FP16-NEXT: vmov.u16 r2, d8[2] -; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] +; CHECK-FP16-NEXT: vmov.u16 r2, d8[0] +; CHECK-FP16-NEXT: cmn r4, #-2147483647 ; CHECK-FP16-NEXT: mvn r7, #-2147483648 ; CHECK-FP16-NEXT: mov r9, #0 ; CHECK-FP16-NEXT: mov r8, #-2147483648 ; CHECK-FP16-NEXT: vmov s18, r0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: movwmi r0, #1 -; CHECK-FP16-NEXT: vmov s0, r2 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r4 -; CHECK-FP16-NEXT: cmn r4, #-2147483647 -; CHECK-FP16-NEXT: movhs r4, r7 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: moveq r4, r0 +; CHECK-FP16-NEXT: movlo r0, r4 ; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: movpl r4, r7 ; CHECK-FP16-NEXT: movpl r1, r9 +; CHECK-FP16-NEXT: moveq r4, r0 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: movwgt r0, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r4 +; CHECK-FP16-NEXT: vmov s0, r2 +; CHECK-FP16-NEXT: movgt r0, r4 ; CHECK-FP16-NEXT: cmp r4, #-2147483648 ; CHECK-FP16-NEXT: movls r4, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r4, r0 +; CHECK-FP16-NEXT: cmn r1, #1 +; CHECK-FP16-NEXT: movne r4, r0 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: movwmi r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: cmn r0, #-2147483647 ; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: vmov.f32 s0, s18 -; CHECK-FP16-NEXT: movne r0, r5 -; CHECK-FP16-NEXT: cmn r5, #-2147483647 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: movhs r5, r7 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: moveq r5, r0 +; CHECK-FP16-NEXT: movlo r0, r5 ; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: movpl r5, r7 ; CHECK-FP16-NEXT: movpl r1, r9 +; CHECK-FP16-NEXT: moveq r5, r0 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: movwgt r0, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r5 +; CHECK-FP16-NEXT: movgt r0, r5 ; CHECK-FP16-NEXT: cmp r5, #-2147483648 ; CHECK-FP16-NEXT: movls r5, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r5, r0 +; CHECK-FP16-NEXT: cmn r1, #1 +; CHECK-FP16-NEXT: movne r5, r0 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] ; CHECK-FP16-NEXT: mov r6, r0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: movwmi r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: cmn r0, #-2147483647 ; CHECK-FP16-NEXT: mvn r0, #-2147483648 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: movne r0, r6 -; CHECK-FP16-NEXT: cmn r6, #-2147483647 -; CHECK-FP16-NEXT: movhs r6, r7 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] -; CHECK-FP16-NEXT: moveq r6, r0 +; CHECK-FP16-NEXT: movlo r0, r6 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: movpl r6, r7 ; CHECK-FP16-NEXT: movpl r1, r9 +; CHECK-FP16-NEXT: moveq r6, r0 ; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: movwgt r0, #1 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: movne r0, r6 +; CHECK-FP16-NEXT: movgt r0, r6 ; CHECK-FP16-NEXT: cmp r6, #-2147483648 ; CHECK-FP16-NEXT: movls r6, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r6, r0 +; CHECK-FP16-NEXT: cmn r1, #1 +; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: vmov s0, r2 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvn r2, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d0[0], r6 -; CHECK-FP16-NEXT: movne r2, r0 ; CHECK-FP16-NEXT: cmn r0, #-2147483647 -; CHECK-FP16-NEXT: movlo r7, r0 -; CHECK-FP16-NEXT: clz r0, r1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r7, r2 +; CHECK-FP16-NEXT: mvn r2, #-2147483648 +; CHECK-FP16-NEXT: movlo r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r9 -; CHECK-FP16-NEXT: cmn r1, #1 -; CHECK-FP16-NEXT: add r1, r1, #1 -; CHECK-FP16-NEXT: movwgt r9, #1 -; CHECK-FP16-NEXT: clz r1, r1 -; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: movmi r7, r0 +; CHECK-FP16-NEXT: movmi r9, r1 +; CHECK-FP16-NEXT: moveq r7, r2 +; CHECK-FP16-NEXT: cmn r9, #1 ; CHECK-FP16-NEXT: mov r0, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: movne r0, r7 +; CHECK-FP16-NEXT: vmov.32 d1[0], r6 +; CHECK-FP16-NEXT: movgt r0, r7 ; CHECK-FP16-NEXT: cmp r7, #-2147483648 +; CHECK-FP16-NEXT: vmov.32 d0[0], r5 ; CHECK-FP16-NEXT: movls r7, r8 -; CHECK-FP16-NEXT: lsr r1, r1, #5 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: moveq r7, r0 +; CHECK-FP16-NEXT: cmn r9, #1 +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: movne r7, r0 ; CHECK-FP16-NEXT: vmov.32 d0[1], r7 ; CHECK-FP16-NEXT: vpop {d8, d9} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} @@ -3247,233 +3011,157 @@ entry: define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: .pad #4 -; CHECK-NEON-NEXT: sub sp, sp, #4 +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} ; CHECK-NEON-NEXT: vmov r0, s3 -; CHECK-NEON-NEXT: vmov.f32 s16, s2 -; CHECK-NEON-NEXT: vmov.f32 s18, s1 +; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r2, r0 -; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: vmov r2, s20 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r3, #0 -; CHECK-NEON-NEXT: movwmi r3, #1 -; CHECK-NEON-NEXT: clz r6, r1 -; CHECK-NEON-NEXT: cmp r3, #0 ; CHECK-NEON-NEXT: mvn r3, #0 -; CHECK-NEON-NEXT: movne r3, r2 -; CHECK-NEON-NEXT: lsr r6, r6, #5 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: movne r3, r2 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r7 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: movmi r3, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r3, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r11, #0 -; CHECK-NEON-NEXT: clz r1, r1 -; CHECK-NEON-NEXT: movwgt r11, #1 -; CHECK-NEON-NEXT: cmp r11, #0 -; CHECK-NEON-NEXT: movne r11, r3 -; CHECK-NEON-NEXT: lsr r1, r1, #5 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: vmov r8, s18 +; CHECK-NEON-NEXT: movwgt r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: movne r7, r3 ; CHECK-NEON-NEXT: cmp r1, #0 ; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: vmov r8, s20 -; CHECK-NEON-NEXT: movne r11, r3 +; CHECK-NEON-NEXT: moveq r7, r3 +; CHECK-NEON-NEXT: mov r0, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r1, #0 -; CHECK-NEON-NEXT: mvn r10, #0 -; CHECK-NEON-NEXT: movwmi r1, #1 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: clz r1, r4 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: lsr r1, r1, #5 +; CHECK-NEON-NEXT: mvn r2, #0 +; CHECK-NEON-NEXT: movmi r2, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r2, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movne r10, r0 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movpl r4, r7 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movwgt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: mov r4, #0 ; CHECK-NEON-NEXT: mov r0, r8 -; CHECK-NEON-NEXT: movne r6, r10 +; CHECK-NEON-NEXT: movwgt r4, #1 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: movne r4, r2 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: moveq r4, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: clz r3, r1 -; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: mvn r2, #0 -; CHECK-NEON-NEXT: movne r2, r0 -; CHECK-NEON-NEXT: lsr r3, r3, #5 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: movne r2, r0 -; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r7 -; CHECK-NEON-NEXT: clz r0, r1 +; CHECK-NEON-NEXT: movmi r2, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r2, r0 +; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: movwgt r5, #1 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: movne r5, r2 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: movne r5, r2 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: moveq r5, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwmi r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: clz r2, r1 -; CHECK-NEON-NEXT: movne r9, r0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r5 -; CHECK-NEON-NEXT: lsr r2, r2, #5 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: movne r9, r0 +; CHECK-NEON-NEXT: vmov.32 d1[0], r5 +; CHECK-NEON-NEXT: movmi r9, r0 +; CHECK-NEON-NEXT: movpl r1, r6 +; CHECK-NEON-NEXT: moveq r9, r0 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movpl r1, r7 -; CHECK-NEON-NEXT: clz r0, r1 +; CHECK-NEON-NEXT: movwgt r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: vmov.32 d0[0], r4 +; CHECK-NEON-NEXT: movne r6, r9 ; CHECK-NEON-NEXT: cmp r1, #0 -; CHECK-NEON-NEXT: movwgt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: movne r7, r9 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: clz r0, r4 -; CHECK-NEON-NEXT: movne r7, r9 -; CHECK-NEON-NEXT: vmov.32 d1[0], r7 -; CHECK-NEON-NEXT: lsr r0, r0, #5 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: movne r6, r10 -; CHECK-NEON-NEXT: vmov.32 d1[1], r11 +; CHECK-NEON-NEXT: vmov.32 d1[1], r7 +; CHECK-NEON-NEXT: moveq r6, r9 ; CHECK-NEON-NEXT: vmov.32 d0[1], r6 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} -; CHECK-NEON-NEXT: add sp, sp, #4 -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-FP16-LABEL: ustest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-FP16-NEXT: .vsave {d8, d9} ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r4, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.u16 r2, d8[0] +; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: clz r3, r1 +; CHECK-FP16-NEXT: vmov.u16 r7, d8[0] +; CHECK-FP16-NEXT: mov r5, #0 +; CHECK-FP16-NEXT: vmov.u16 r3, d8[2] +; CHECK-FP16-NEXT: movpl r1, r5 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: mov r10, #0 -; CHECK-FP16-NEXT: vmov s0, r4 -; CHECK-FP16-NEXT: lsr r3, r3, #5 ; CHECK-FP16-NEXT: mvn r8, #0 -; CHECK-FP16-NEXT: vmov s18, r2 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: vmov s16, r2 ; CHECK-FP16-NEXT: mvn r2, #0 -; CHECK-FP16-NEXT: movne r2, r0 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: movne r2, r0 +; CHECK-FP16-NEXT: movmi r2, r0 +; CHECK-FP16-NEXT: vmov s0, r7 +; CHECK-FP16-NEXT: moveq r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r6 -; CHECK-FP16-NEXT: clz r0, r1 +; CHECK-FP16-NEXT: movwgt r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: movne r6, r2 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r10, #1 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: movne r10, r2 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r10, r2 +; CHECK-FP16-NEXT: vmov s18, r3 +; CHECK-FP16-NEXT: moveq r6, r2 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r1, #0 ; CHECK-FP16-NEXT: vmov.f32 s0, s18 -; CHECK-FP16-NEXT: movwmi r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: clz r1, r4 -; CHECK-FP16-NEXT: mvn r9, #0 -; CHECK-FP16-NEXT: movne r9, r0 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: lsr r1, r1, #5 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movne r9, r0 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movpl r4, r6 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movwgt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: movne r5, r9 -; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: clz r3, r1 -; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: mvn r2, #0 -; CHECK-FP16-NEXT: movne r2, r0 -; CHECK-FP16-NEXT: lsr r3, r3, #5 -; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: movpl r1, r5 +; CHECK-FP16-NEXT: movmi r2, r0 ; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: movne r2, r0 +; CHECK-FP16-NEXT: moveq r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r6 -; CHECK-FP16-NEXT: clz r0, r1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: vmov.u16 r1, d8[2] ; CHECK-FP16-NEXT: movwgt r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: movne r7, r2 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r7, r2 -; CHECK-FP16-NEXT: vmov s0, r1 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: moveq r7, r2 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.f32 s0, s16 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwmi r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: clz r2, r1 -; CHECK-FP16-NEXT: movne r8, r0 -; CHECK-FP16-NEXT: vmov.32 d0[0], r7 -; CHECK-FP16-NEXT: lsr r2, r2, #5 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: movne r8, r0 +; CHECK-FP16-NEXT: mvn r2, #0 +; CHECK-FP16-NEXT: movpl r1, r5 +; CHECK-FP16-NEXT: movmi r2, r0 +; CHECK-FP16-NEXT: mov r4, #0 +; CHECK-FP16-NEXT: moveq r2, r0 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movpl r1, r6 -; CHECK-FP16-NEXT: clz r0, r1 +; CHECK-FP16-NEXT: movwgt r4, #1 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: movne r4, r2 ; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: movwgt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: movne r6, r8 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: clz r0, r4 -; CHECK-FP16-NEXT: movne r6, r8 -; CHECK-FP16-NEXT: vmov.32 d1[0], r6 -; CHECK-FP16-NEXT: lsr r0, r0, #5 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r5, r9 -; CHECK-FP16-NEXT: vmov.32 d1[1], r10 +; CHECK-FP16-NEXT: moveq r4, r2 +; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: vmov.32 d1[0], r4 +; CHECK-FP16-NEXT: movmi r8, r0 +; CHECK-FP16-NEXT: movpl r1, r5 +; CHECK-FP16-NEXT: moveq r8, r0 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: movwgt r5, #1 +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: vmov.32 d0[0], r7 +; CHECK-FP16-NEXT: movne r5, r8 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: vmov.32 d1[1], r6 +; CHECK-FP16-NEXT: moveq r5, r8 ; CHECK-FP16-NEXT: vmov.32 d0[1], r5 ; CHECK-FP16-NEXT: vpop {d8, d9} -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index f63477e038264c..a5f9c511e0680a 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -15,19 +15,15 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) { ; CHECK-NEXT: adds r6, r0, #1 ; CHECK-NEXT: adc r4, r4, #0 ; CHECK-NEXT: subs.w r0, lr, #-1 -; CHECK-NEXT: sbcs r0, r12, #0 ; CHECK-NEXT: vmov q1[2], q1[0], lr, r6 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: sbcs r0, r12, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r12, r4 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r12, ne +; CHECK-NEXT: csetm r12, lo ; CHECK-NEXT: subs.w r6, r6, #-1 -; CHECK-NEXT: sbcs r6, r4, #0 ; CHECK-NEXT: bfi r5, r12, #0, #8 -; CHECK-NEXT: cset r6, lo -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csetm r6, ne +; CHECK-NEXT: sbcs r6, r4, #0 +; CHECK-NEXT: mov.w r0, #0 +; CHECK-NEXT: csetm r6, lo ; CHECK-NEXT: bfi r5, r6, #8, #8 ; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q1, q1, q0 @@ -38,17 +34,13 @@ define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) { ; CHECK-NEXT: subs r1, r6, r1 ; CHECK-NEXT: sbcs.w r1, r5, r4 ; CHECK-NEXT: vmov r5, r4, d1 -; CHECK-NEXT: cset r1, lo +; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: vldr d1, [sp, #16] -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r6, d3 ; CHECK-NEXT: subs r1, r1, r5 ; CHECK-NEXT: sbcs.w r1, r6, r4 -; CHECK-NEXT: cset r1, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lo ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: add r0, sp, #24 diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index 35b418f692265c..08bcba9b5cd7d7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -23,37 +23,29 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-NEXT: subs.w r3, r4, r12 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: mov.w r12, #-1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: adr r4, .LCPI0_1 ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: adr r4, .LCPI0_1 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #0, #8 ; CHECK-NEXT: rsbs.w r0, r3, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r5 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -99,18 +91,14 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r2, r3, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vpop {d8, d9} @@ -141,35 +129,27 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r2, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: rsbs r1, r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r5 ; CHECK-NEXT: bfi r2, r0, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -220,21 +200,17 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov r0, r4, d8 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 ; CHECK-NEXT: vmsr p0, r3 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r5, r0 @@ -245,15 +221,11 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r7, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r7, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r7 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -368,20 +340,16 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.i32 q5, #0x0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov.u16 r0, q4[0] +; CHECK-NEXT: vmov.i32 q5, #0x0 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __fixhfdi @@ -393,15 +361,11 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r6, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r6, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -450,16 +414,12 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 ; CHECK-NEXT: adr r4, .LCPI9_1 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -468,15 +428,11 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r1, lr, r1 ; CHECK-NEXT: sbcs.w r1, r12, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: subs.w r1, lr, r3 ; CHECK-NEXT: sbcs.w r1, r12, r5 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -527,15 +483,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: subs r0, r0, r4 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #0, #8 ; CHECK-NEXT: subs r0, r2, r4 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -573,15 +525,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -590,15 +538,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: sbcs.w r1, r0, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: rsbs r2, r3, #0 ; CHECK-NEXT: sbcs.w r2, r0, r5 ; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1299,37 +1243,29 @@ define arm_aapcs_vfpcc <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: subs.w r3, r4, r12 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: mov.w r12, #-1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: adr r4, .LCPI27_1 ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: adr r4, .LCPI27_1 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #0, #8 ; CHECK-NEXT: rsbs.w r0, r3, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r5 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1373,18 +1309,14 @@ define arm_aapcs_vfpcc <2 x i32> @utest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r2, r3, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vpop {d8, d9} @@ -1414,35 +1346,27 @@ define arm_aapcs_vfpcc <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov q1[2], q1[0], r4, r0 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r1 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r5, r0, #8, #8 +; CHECK-NEXT: vmov.i64 q0, #0xffffffff ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r2, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: rsbs r1, r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r5 ; CHECK-NEXT: bfi r2, r0, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1489,21 +1413,17 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo +; CHECK-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov r0, r4, d8 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 ; CHECK-NEXT: vmsr p0, r3 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r5, r0 @@ -1514,15 +1434,11 @@ define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r5, r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r6, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r7, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r7, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r7 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -1631,20 +1547,16 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.i32 q5, #0x0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov.u16 r0, q4[0] +; CHECK-NEXT: vmov.i32 q5, #0x0 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q6, q0, q5 ; CHECK-NEXT: bl __fixhfdi @@ -1656,15 +1568,11 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 ; CHECK-NEXT: sbcs.w r2, r6, r5 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r1 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r6, r1 ; CHECK-NEXT: bfi r6, r2, #0, #8 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r6, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vpsel q0, q0, q5 @@ -1711,16 +1619,12 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 ; CHECK-NEXT: adr r4, .LCPI36_1 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1729,15 +1633,11 @@ define arm_aapcs_vfpcc <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r1, lr, r1 ; CHECK-NEXT: sbcs.w r1, r12, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: subs.w r1, lr, r3 ; CHECK-NEXT: sbcs.w r1, r12, r5 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1786,15 +1686,11 @@ define arm_aapcs_vfpcc <2 x i16> @utest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: subs r0, r0, r4 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #0, #8 ; CHECK-NEXT: subs r0, r2, r4 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r5, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1831,15 +1727,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: subs r1, r1, r4 ; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: subs r1, r3, r4 ; CHECK-NEXT: sbcs r1, r5, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r2, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1848,15 +1740,11 @@ define arm_aapcs_vfpcc <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: sbcs.w r1, r0, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: rsbs r2, r3, #0 ; CHECK-NEXT: sbcs.w r2, r0, r5 ; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll index f92c575b7d2211..c3d4276c712c67 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -95,55 +95,47 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: ldrd r12, lr, [r1] ; CHECK-LE-NEXT: movs r3, #0 -; CHECK-LE-NEXT: @ implicit-def: $q0 +; CHECK-LE-NEXT: @ implicit-def: $q1 ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 -; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr +; CHECK-LE-NEXT: vmov q0[2], q0[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne ; CHECK-LE-NEXT: ldrne r1, [r2] -; CHECK-LE-NEXT: vmovne.32 q0[0], r1 +; CHECK-LE-NEXT: vmovne.32 q1[0], r1 ; CHECK-LE-NEXT: lsls r1, r3, #30 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] -; CHECK-LE-NEXT: vmovmi.32 q0[2], r1 -; CHECK-LE-NEXT: vmov r2, s2 +; CHECK-LE-NEXT: vmovmi.32 q1[2], r1 +; CHECK-LE-NEXT: vmov r2, s6 ; CHECK-LE-NEXT: movs r1, #0 -; CHECK-LE-NEXT: vmov r3, s4 -; CHECK-LE-NEXT: vmov r4, s0 -; CHECK-LE-NEXT: vmov q0[2], q0[0], r4, r2 +; CHECK-LE-NEXT: vmov r3, s0 +; CHECK-LE-NEXT: vmov r4, s4 +; CHECK-LE-NEXT: vmov q1[2], q1[0], r4, r2 ; CHECK-LE-NEXT: rsbs r5, r3, #0 ; CHECK-LE-NEXT: asr.w r12, r2, #31 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3, asr #31 -; CHECK-LE-NEXT: vmov r3, s6 -; CHECK-LE-NEXT: cset r2, lt +; CHECK-LE-NEXT: vmov r3, s2 +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: asr.w lr, r4, #31 -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: vmov q0[3], q0[1], lr, r12 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: vmov q1[3], q1[1], lr, r12 ; CHECK-LE-NEXT: rsbs r5, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne -; CHECK-LE-NEXT: vstrne d0, [r0] +; CHECK-LE-NEXT: vstrne d2, [r0] ; CHECK-LE-NEXT: lsls r1, r1, #30 ; CHECK-LE-NEXT: it mi -; CHECK-LE-NEXT: vstrmi d1, [r0, #8] +; CHECK-LE-NEXT: vstrmi d3, [r0, #8] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: pop {r4, r5, r7, pc} ; @@ -157,17 +149,13 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt ; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q2 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q2 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB5_2 @@ -198,16 +186,12 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: sbcs.w r4, r1, r4, asr #31 ; CHECK-BE-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-BE-NEXT: vmov r3, s9 -; CHECK-BE-NEXT: cset r2, lt +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne ; CHECK-BE-NEXT: rsbs r5, r3, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -239,15 +223,11 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne @@ -266,17 +246,13 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: asr.w r12, r2, #31 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3, asr #31 ; CHECK-LE-NEXT: vmov r3, s6 -; CHECK-LE-NEXT: cset r2, lt +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: asr.w lr, r4, #31 -; CHECK-LE-NEXT: cmp r2, #0 ; CHECK-LE-NEXT: vmov q0[3], q0[1], lr, r12 -; CHECK-LE-NEXT: csetm r2, ne ; CHECK-LE-NEXT: rsbs r5, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: itt ne @@ -299,17 +275,13 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt ; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q2 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q2 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB6_2 @@ -340,16 +312,12 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: sbcs.w r4, r1, r4, asr #31 ; CHECK-BE-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-BE-NEXT: vmov r3, s9 -; CHECK-BE-NEXT: cset r2, lt +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne ; CHECK-BE-NEXT: rsbs r5, r3, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: itt mi @@ -384,15 +352,11 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne @@ -408,15 +372,11 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-LE-NEXT: rsbs r3, r2, #0 ; CHECK-LE-NEXT: vmov r3, s6 ; CHECK-LE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs r4, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne @@ -437,51 +397,43 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: vmov q1[3], q1[1], r12, lr +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB7_2 ; CHECK-BE-NEXT: @ %bb.1: @ %cond.load ; CHECK-BE-NEXT: ldr r3, [r2] ; CHECK-BE-NEXT: vmov.32 q2[1], r3 -; CHECK-BE-NEXT: vrev64.32 q1, q2 +; CHECK-BE-NEXT: vrev64.32 q0, q2 ; CHECK-BE-NEXT: .LBB7_2: @ %else -; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vrev64.32 q2, q1 ; CHECK-BE-NEXT: lsls r1, r1, #31 ; CHECK-BE-NEXT: beq .LBB7_4 ; CHECK-BE-NEXT: @ %bb.3: @ %cond.load1 ; CHECK-BE-NEXT: ldr r1, [r2, #4] -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: vmov.32 q0[3], r1 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.32 q1[3], r1 +; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: .LBB7_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vmov r2, s15 -; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-BE-NEXT: vand q0, q0, q1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: vmov r3, s13 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r12, ne +; CHECK-BE-NEXT: csetm r12, lt ; CHECK-BE-NEXT: rsbs r2, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r12, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -514,15 +466,11 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: rsbs.w r1, r12, #0 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr ; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: rsbs.w r4, lr, #0 ; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31 ; CHECK-LE-NEXT: bfi r3, r1, #0, #1 -; CHECK-LE-NEXT: cset r1, lt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: csetm r1, ne +; CHECK-LE-NEXT: csetm r1, lt ; CHECK-LE-NEXT: bfi r3, r1, #1, #1 ; CHECK-LE-NEXT: lsls r1, r3, #31 ; CHECK-LE-NEXT: itt ne @@ -538,15 +486,11 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-LE-NEXT: rsbs r3, r2, #0 ; CHECK-LE-NEXT: vmov r3, s6 ; CHECK-LE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs r4, r3, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, r3, asr #31 ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: itt ne @@ -569,51 +513,43 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, lr, asr #31 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: vmov q0[3], q0[1], r12, lr -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: @ implicit-def: $q1 -; CHECK-BE-NEXT: csetm lr, ne +; CHECK-BE-NEXT: vmov q1[3], q1[1], r12, lr +; CHECK-BE-NEXT: csetm lr, lt ; CHECK-BE-NEXT: rsbs.w r3, r12, #0 +; CHECK-BE-NEXT: @ implicit-def: $q0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12, asr #31 ; CHECK-BE-NEXT: bfi r1, lr, #0, #1 -; CHECK-BE-NEXT: cset r3, lt -; CHECK-BE-NEXT: cmp r3, #0 -; CHECK-BE-NEXT: csetm r3, ne +; CHECK-BE-NEXT: csetm r3, lt ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 ; CHECK-BE-NEXT: lsls r3, r1, #30 ; CHECK-BE-NEXT: bpl .LBB8_2 ; CHECK-BE-NEXT: @ %bb.1: @ %cond.load ; CHECK-BE-NEXT: ldr r3, [r2] ; CHECK-BE-NEXT: vmov.32 q2[1], r3 -; CHECK-BE-NEXT: vrev64.32 q1, q2 +; CHECK-BE-NEXT: vrev64.32 q0, q2 ; CHECK-BE-NEXT: .LBB8_2: @ %else -; CHECK-BE-NEXT: vrev64.32 q2, q0 +; CHECK-BE-NEXT: vrev64.32 q2, q1 ; CHECK-BE-NEXT: lsls r1, r1, #31 ; CHECK-BE-NEXT: beq .LBB8_4 ; CHECK-BE-NEXT: @ %bb.3: @ %cond.load1 ; CHECK-BE-NEXT: ldr r1, [r2, #4] -; CHECK-BE-NEXT: vrev64.32 q0, q1 -; CHECK-BE-NEXT: vmov.32 q0[3], r1 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: vmov.32 q1[3], r1 +; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: .LBB8_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vmov r2, s15 -; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff -; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff +; CHECK-BE-NEXT: vand q0, q0, q1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: vmov r3, s13 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r12, ne +; CHECK-BE-NEXT: csetm r12, lt ; CHECK-BE-NEXT: rsbs r2, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r3, asr #31 ; CHECK-BE-NEXT: bfi r1, r12, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: itt mi diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll index 2adaf7cf577a76..afcea7901ccf7a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll @@ -1759,15 +1759,11 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 ; CHECK-LE-NEXT: vmov r12, lr, d1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: beq .LBB49_2 @@ -1801,15 +1797,11 @@ define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 ; CHECK-BE-NEXT: vmov r12, lr, d2 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: bpl .LBB49_2 @@ -1848,15 +1840,11 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des ; CHECK-LE-NEXT: vmov r12, lr, d3 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: beq .LBB50_2 @@ -1890,15 +1878,11 @@ define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%des ; CHECK-BE-NEXT: vmov r12, lr, d0 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: bpl .LBB50_2 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll index 6c598cf71b2e1e..29b29859e86290 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll @@ -944,15 +944,11 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) { ; CHECK-LE-NEXT: vmov r12, lr, d1 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne @@ -975,15 +971,11 @@ define arm_aapcs_vfpcc void @masked_v2i64(<2 x i64> *%dest, <2 x i64> %a) { ; CHECK-BE-NEXT: vmov r12, lr, d2 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -1011,15 +1003,11 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, ; CHECK-LE-NEXT: vmov r12, lr, d3 ; CHECK-LE-NEXT: rsbs r2, r2, #0 ; CHECK-LE-NEXT: sbcs.w r2, r1, r3 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: rsbs.w r3, r12, #0 ; CHECK-LE-NEXT: sbcs.w r3, r1, lr ; CHECK-LE-NEXT: bfi r1, r2, #0, #1 -; CHECK-LE-NEXT: cset r2, lt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, lt ; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: it ne @@ -1042,15 +1030,11 @@ define arm_aapcs_vfpcc void @masked_v2f64(<2 x double> *%dest, <2 x double> %a, ; CHECK-BE-NEXT: vmov r12, lr, d4 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: rsbs.w r3, lr, #0 ; CHECK-BE-NEXT: sbcs.w r3, r1, r12 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: cset r2, lt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, lt ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: lsls r2, r1, #30 ; CHECK-BE-NEXT: it mi @@ -1216,33 +1200,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 -; CHECK-LE-NEXT: cset r1, gt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #0, #1 ; CHECK-LE-NEXT: vcmp.f32 s2, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #0, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: vcmp.f32 s3, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #1, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: bfi r1, r2, #2, #1 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB25_5 @@ -1282,6 +1258,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1289,27 +1266,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align4(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 -; CHECK-BE-NEXT: cset r1, gt -; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #0, #1 ; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #0, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #1, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: bfi r1, r2, #2, #1 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 ; CHECK-BE-NEXT: lsls r2, r1, #28 ; CHECK-BE-NEXT: bmi .LBB25_5 @@ -1356,33 +1324,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: .pad #4 ; CHECK-LE-NEXT: sub sp, #4 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 -; CHECK-LE-NEXT: cset r1, gt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #0, #1 ; CHECK-LE-NEXT: vcmp.f32 s2, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #0, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: vcmp.f32 s3, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #1, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: bfi r1, r2, #2, #1 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB26_5 @@ -1422,6 +1382,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1429,27 +1390,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align2(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 -; CHECK-BE-NEXT: cset r1, gt -; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #0, #1 ; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #0, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #1, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: bfi r1, r2, #2, #1 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 ; CHECK-BE-NEXT: lsls r2, r1, #28 ; CHECK-BE-NEXT: bmi .LBB26_5 @@ -1496,33 +1448,25 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-LE-NEXT: .pad #20 ; CHECK-LE-NEXT: sub sp, #20 ; CHECK-LE-NEXT: vcmp.f32 s0, #0 -; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 +; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: vcmp.f32 s1, #0 -; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 +; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 ; CHECK-LE-NEXT: vcvtb.f16.f32 s6, s2 +; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 ; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 -; CHECK-LE-NEXT: cset r1, gt -; CHECK-LE-NEXT: cmp r1, #0 -; CHECK-LE-NEXT: mov.w r1, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #0, #1 ; CHECK-LE-NEXT: vcmp.f32 s2, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #0, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-LE-NEXT: bfi r1, r2, #1, #1 ; CHECK-LE-NEXT: vcmp.f32 s3, #0 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: bfi r1, r2, #1, #1 +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-LE-NEXT: bfi r1, r2, #2, #1 -; CHECK-LE-NEXT: cset r2, gt -; CHECK-LE-NEXT: cmp r2, #0 -; CHECK-LE-NEXT: csetm r2, ne +; CHECK-LE-NEXT: csetm r2, gt ; CHECK-LE-NEXT: bfi r1, r2, #3, #1 ; CHECK-LE-NEXT: lsls r2, r1, #31 ; CHECK-LE-NEXT: bne .LBB27_5 @@ -1570,6 +1514,7 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: .pad #20 ; CHECK-BE-NEXT: sub sp, #20 ; CHECK-BE-NEXT: vrev64.32 q1, q0 +; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr @@ -1577,27 +1522,18 @@ define arm_aapcs_vfpcc void @masked_v4f16_align1(<4 x half> *%dest, <4 x float> ; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 ; CHECK-BE-NEXT: vcvtb.f16.f32 s2, s6 ; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 -; CHECK-BE-NEXT: cset r1, gt -; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: mov.w r1, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #0, #1 ; CHECK-BE-NEXT: vcmp.f32 s5, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #0, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-BE-NEXT: bfi r1, r2, #1, #1 ; CHECK-BE-NEXT: vcmp.f32 s4, #0 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: bfi r1, r2, #1, #1 +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: bfi r1, r2, #2, #1 -; CHECK-BE-NEXT: cset r2, gt -; CHECK-BE-NEXT: cmp r2, #0 -; CHECK-BE-NEXT: csetm r2, ne +; CHECK-BE-NEXT: csetm r2, gt ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 ; CHECK-BE-NEXT: lsls r2, r1, #28 ; CHECK-BE-NEXT: bmi .LBB27_5 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll index 92355a8256eb45..d536e6b72ac9c3 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -47,15 +47,11 @@ define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -111,15 +107,11 @@ define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -176,15 +168,11 @@ define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -240,15 +228,11 @@ define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: subs.w r0, r4, r12 ; CHECK-NEXT: sbcs.w r0, r5, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -329,18 +313,12 @@ define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x do ; CHECK-NEXT: vmov r12, r1, d9 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: vmov r2, r3, d11 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: bfi r4, r0, #0, #8 ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r4, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r4 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll index 2b4f3d66fe64f2..892be9a4330733 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll @@ -153,21 +153,17 @@ define arm_aapcs_vfpcc <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: subs r1, r1, r3 ; CHECK-NEXT: sbcs.w r1, lr, r3, asr #31 ; CHECK-NEXT: asr.w r5, r3, #31 -; CHECK-NEXT: cset r1, lt ; CHECK-NEXT: asr.w r12, r0, #31 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31 ; CHECK-NEXT: bfi r3, r1, #0, #8 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: asrs r4, r2, #31 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmov q1[3], q1[1], lr, r12 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: vmov q0[3], q0[1], r5, r4 -; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: pop {r4, r5, r7, pc} @@ -233,17 +229,13 @@ define arm_aapcs_vfpcc <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d3 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -265,17 +257,13 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d3 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d7 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d0 ; CHECK-NEXT: vmsr p0, r3 @@ -285,16 +273,12 @@ define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d5 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d1 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 @@ -453,17 +437,13 @@ define arm_aapcs_vfpcc <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d3 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -530,17 +510,13 @@ define arm_aapcs_vfpcc <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d3 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d1 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -562,17 +538,13 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d3 -; CHECK-NEXT: cset r1, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d7 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d0 ; CHECK-NEXT: vmsr p0, r3 @@ -582,16 +554,12 @@ define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d5 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d1 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 @@ -747,29 +715,25 @@ define arm_aapcs_vfpcc <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vmov r0, s6 ; CHECK-NEXT: vmov r1, s4 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 -; CHECK-NEXT: vmov lr, s2 ; CHECK-NEXT: asrs r2, r0, #31 ; CHECK-NEXT: asrs r3, r1, #31 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 ; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: vmov q0[2], q0[0], r3, lr -; CHECK-NEXT: asr.w r12, lr, #31 -; CHECK-NEXT: asrs r2, r3, #31 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 +; CHECK-NEXT: asr.w lr, r3, #31 ; CHECK-NEXT: subs r3, r3, r1 -; CHECK-NEXT: sbcs.w r1, r2, r1, asr #31 -; CHECK-NEXT: vmov q0[3], q0[1], r2, r12 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r2, r1, #0, #8 -; CHECK-NEXT: subs.w r1, lr, r0 +; CHECK-NEXT: sbcs.w r1, lr, r1, asr #31 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: asr.w r12, r2, #31 +; CHECK-NEXT: bfi r3, r1, #0, #8 +; CHECK-NEXT: subs r1, r2, r0 ; CHECK-NEXT: sbcs.w r0, r12, r0, asr #31 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi r2, r0, #8, #8 -; CHECK-NEXT: vmsr p0, r2 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r12 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r3, r0, #8, #8 +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: pop {r7, pc} %c = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) @@ -834,17 +798,13 @@ define arm_aapcs_vfpcc <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -866,17 +826,13 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d7 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d3 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d4 ; CHECK-NEXT: vmsr p0, r3 @@ -886,16 +842,12 @@ define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d1 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d5 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 @@ -1054,17 +1006,13 @@ define arm_aapcs_vfpcc <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1131,17 +1079,13 @@ define arm_aapcs_vfpcc <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -1163,17 +1107,13 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs.w r1, r2, r12 ; CHECK-NEXT: vmov lr, r12, d7 -; CHECK-NEXT: cset r1, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 ; CHECK-NEXT: vmov r2, r4, d3 ; CHECK-NEXT: subs.w r2, r2, lr ; CHECK-NEXT: sbcs.w r2, r4, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r3, r2, #8, #8 ; CHECK-NEXT: vmov r2, r12, d4 ; CHECK-NEXT: vmsr p0, r3 @@ -1183,16 +1123,12 @@ define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 ; CHECK-NEXT: vmov r4, r3, d1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #0, #8 ; CHECK-NEXT: vmov r2, r12, d5 ; CHECK-NEXT: subs r2, r4, r2 ; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: bfi r1, r2, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q2 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll index e919891c446e50..ea7a26ee3a9ee4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll @@ -79,14 +79,10 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) { ; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: sbcs.w r1, r12, r3 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 ; CHECK-NEXT: bx lr @@ -105,24 +101,22 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2f64(<2 x double> %src) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI6_0 -; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: vmov r4, r5, d0 ; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: clz r1, r6 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: lsrs r1, r1, #5 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csetm r1, eq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: csetm r0, eq +; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 +; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 3 @@ -218,14 +212,10 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) { ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r12, r1 ; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: sbcs.w r1, r12, r3 ; CHECK-NEXT: vmov s2, r0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r0, ne +; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 2 @@ -247,27 +237,24 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2f64(<2 x double> %src) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI13_0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: vmov r4, r5, d0 ; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq -; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: vldr s17, .LCPI13_1 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: cset r6, ne -; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpeq -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov s18, r6 -; CHECK-NEXT: vmov.f32 s19, s17 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: cset r0, ne -; CHECK-NEXT: vmov s16, r0 -; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vldr s1, .LCPI13_1 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cset r0, eq +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vmov.f32 s3, s1 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 3 @@ -472,21 +459,18 @@ define arm_aapcs_vfpcc <2 x double> @uitofp_v2i1_v2f64(<2 x i64> %src) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: sbcs.w r0, r12, r3 +; CHECK-NEXT: sbcs.w r0, r4, r1 ; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: bl __aeabi_ui2d -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: cset r2, ne +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_ui2d ; CHECK-NEXT: vmov d8, r0, r1 @@ -506,21 +490,18 @@ define arm_aapcs_vfpcc <2 x double> @sitofp_v2i1_v2f64(<2 x i64> %src) { ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: rsbs r0, r2, #0 -; CHECK-NEXT: sbcs.w r0, r12, r3 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: sbcs.w r0, r4, r1 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bl __aeabi_i2d -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: csetm r2, lt ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_i2d ; CHECK-NEXT: vmov d8, r0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 20112715a0a451..251b187e7bcf23 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -6,8 +6,8 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB0_8 ; CHECK-NEXT: @ %bb.1: @ %entry @@ -16,64 +16,57 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: mov r11, r2 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: mov r10, r2 ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: @ %vector.ph -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: bic r3, r3, #1 -; CHECK-NEXT: subs r7, r3, #2 +; CHECK-NEXT: bic r5, r3, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 +; CHECK-NEXT: subs r7, r5, #2 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: add.w r3, r1, r5, lsl #2 ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI0_1 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: add.w r11, r2, r3, lsl #2 -; CHECK-NEXT: add.w r10, r1, r3, lsl #2 -; CHECK-NEXT: add.w r12, r0, r3, lsl #2 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r5, [sp] @ 4-byte Spill +; CHECK-NEXT: add.w r10, r2, r5, lsl #2 +; CHECK-NEXT: add.w r12, r0, r5, lsl #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r4, r5, [r0], #8 -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: ldrd r4, r6, [r0], #8 +; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: ldrd r7, r8, [r1], #8 -; CHECK-NEXT: smull r8, r5, r8, r5 -; CHECK-NEXT: smull r4, r7, r7, r4 -; CHECK-NEXT: asrl r8, r5, #31 -; CHECK-NEXT: asrl r4, r7, #31 +; CHECK-NEXT: smull r4, r11, r7, r4 +; CHECK-NEXT: asrl r4, r11, #31 ; CHECK-NEXT: rsbs.w r9, r4, #-2147483648 -; CHECK-NEXT: vmov q2[2], q2[0], r4, r8 ; CHECK-NEXT: mov.w r9, #-1 -; CHECK-NEXT: sbcs.w r3, r9, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: vmov q2[3], q2[1], r7, r5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #0, #8 -; CHECK-NEXT: rsbs.w r3, r8, #-2147483648 -; CHECK-NEXT: sbcs.w r3, r9, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r6 +; CHECK-NEXT: sbcs.w r3, r9, r11 +; CHECK-NEXT: csetm r3, lt +; CHECK-NEXT: bfi r5, r3, #0, #8 +; CHECK-NEXT: smull r6, r3, r8, r6 +; CHECK-NEXT: asrl r6, r3, #31 +; CHECK-NEXT: rsbs.w r7, r6, #-2147483648 +; CHECK-NEXT: vmov q2[2], q2[0], r4, r6 +; CHECK-NEXT: sbcs.w r7, r9, r3 +; CHECK-NEXT: vmov q2[3], q2[1], r11, r3 +; CHECK-NEXT: csetm r7, lt ; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: bfi r5, r7, #8, #8 +; CHECK-NEXT: vmsr p0, r5 ; CHECK-NEXT: vpsel q2, q2, q0 ; CHECK-NEXT: vmov r3, r4, d4 ; CHECK-NEXT: subs r3, r3, r6 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 ; CHECK-NEXT: vmov r3, r5, d5 ; CHECK-NEXT: subs r3, r3, r6 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vpsel q2, q2, q1 @@ -83,6 +76,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block ; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r7, r3 ; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader @@ -93,7 +87,7 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r3, [r12], #4 -; CHECK-NEXT: ldr r4, [r10], #4 +; CHECK-NEXT: ldr r4, [r6], #4 ; CHECK-NEXT: smull r4, r3, r4, r3 ; CHECK-NEXT: asrl r4, r3, #31 ; CHECK-NEXT: subs r5, r1, r4 @@ -105,10 +99,10 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: subs r5, r4, r2 ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: csel r3, r4, r2, lt -; CHECK-NEXT: str r3, [r11], #4 +; CHECK-NEXT: str r3, [r10], #4 ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: add sp, #12 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.9: @@ -212,122 +206,108 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB1_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB1_3 ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r9, r5 ; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: b .LBB1_6 ; CHECK-NEXT: .LBB1_3: @ %vector.ph -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: subs r7, r3, #4 +; CHECK-NEXT: bic r1, r3, #3 ; CHECK-NEXT: adr r4, .LCPI1_0 +; CHECK-NEXT: subs r7, r1, #4 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: vldrw.u32 q0, [r4] -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: add.w r7, r1, r3, lsl #2 -; CHECK-NEXT: strd r7, r3, [sp, #4] @ 8-byte Folded Spill ; CHECK-NEXT: adr r4, .LCPI1_1 -; CHECK-NEXT: add.w r11, r2, r3, lsl #2 -; CHECK-NEXT: add.w r12, r0, r3, lsl #2 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w lr, r6, r7, lsr #2 +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill +; CHECK-NEXT: add.w r11, r2, r1, lsl #2 +; CHECK-NEXT: add.w r9, r5, r1, lsl #2 +; CHECK-NEXT: add.w r12, r0, r1, lsl #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: mov.w r9, #-1 ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q4, [r5], #16 ; CHECK-NEXT: vldrw.u32 q3, [r0], #16 -; CHECK-NEXT: vldrw.u32 q4, [r1], #16 -; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: mov.w r8, #0 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov.w r2, #-1 ; CHECK-NEXT: vmov.f32 s8, s14 -; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: vmov.f32 s20, s18 +; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: vmov.f32 s10, s15 ; CHECK-NEXT: vmov.f32 s22, s19 ; CHECK-NEXT: vmullb.s32 q6, q5, q2 -; CHECK-NEXT: vmov.f32 s14, s13 +; CHECK-NEXT: vmov.f32 s18, s17 ; CHECK-NEXT: vmov r4, r7, d12 ; CHECK-NEXT: asrl r4, r7, #31 -; CHECK-NEXT: vmov.f32 s18, s17 +; CHECK-NEXT: vmov.f32 s14, s13 ; CHECK-NEXT: rsbs.w r5, r4, #-2147483648 -; CHECK-NEXT: sbcs.w r5, r3, r7 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: sbcs.w r5, r2, r7 +; CHECK-NEXT: csetm r5, lt ; CHECK-NEXT: bfi r8, r5, #0, #8 ; CHECK-NEXT: vmov r10, r5, d13 ; CHECK-NEXT: asrl r10, r5, #31 +; CHECK-NEXT: vmov r6, s18 ; CHECK-NEXT: rsbs.w r3, r10, #-2147483648 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r10 -; CHECK-NEXT: sbcs.w r3, r6, r5 +; CHECK-NEXT: sbcs.w r3, r2, r5 ; CHECK-NEXT: vmov q2[3], q2[1], r7, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: mvn r10, #-2147483648 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mov.w r6, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r8, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r8 +; CHECK-NEXT: mvn r8, #-2147483648 ; CHECK-NEXT: vpsel q2, q2, q0 ; CHECK-NEXT: vmov r3, r4, d4 -; CHECK-NEXT: subs.w r3, r3, r10 +; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 ; CHECK-NEXT: vmov r3, r5, d5 -; CHECK-NEXT: subs.w r3, r3, r10 +; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: vmov r5, s18 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmov r3, s12 ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vmov r4, s16 ; CHECK-NEXT: vpsel q2, q2, q1 -; CHECK-NEXT: smull r8, r7, r4, r3 -; CHECK-NEXT: asrl r8, r7, #31 -; CHECK-NEXT: rsbs.w r3, r8, #-2147483648 -; CHECK-NEXT: sbcs.w r3, r9, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #0, #8 -; CHECK-NEXT: vmov r3, s14 -; CHECK-NEXT: smull r4, r5, r5, r3 -; CHECK-NEXT: asrl r4, r5, #31 +; CHECK-NEXT: smull r4, r7, r4, r3 +; CHECK-NEXT: asrl r4, r7, #31 ; CHECK-NEXT: rsbs.w r3, r4, #-2147483648 -; CHECK-NEXT: vmov q3[2], q3[0], r8, r4 -; CHECK-NEXT: sbcs.w r3, r9, r5 -; CHECK-NEXT: vmov q3[3], q3[1], r7, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r6, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r6 +; CHECK-NEXT: sbcs.w r3, r2, r7 +; CHECK-NEXT: csetm r3, lt +; CHECK-NEXT: bfi r5, r3, #0, #8 +; CHECK-NEXT: vmov r3, s14 +; CHECK-NEXT: smull r6, r3, r6, r3 +; CHECK-NEXT: asrl r6, r3, #31 +; CHECK-NEXT: rsbs.w r1, r6, #-2147483648 +; CHECK-NEXT: vmov q3[2], q3[0], r4, r6 +; CHECK-NEXT: sbcs.w r1, r2, r3 +; CHECK-NEXT: vmov q3[3], q3[1], r7, r3 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: bfi r5, r1, #8, #8 +; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload ; CHECK-NEXT: vpsel q3, q3, q0 -; CHECK-NEXT: vmov r3, r4, d6 -; CHECK-NEXT: subs.w r3, r3, r10 -; CHECK-NEXT: sbcs r3, r4, #0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d7 -; CHECK-NEXT: subs.w r3, r3, r10 -; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r4, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r4 +; CHECK-NEXT: vmov r1, r3, d6 +; CHECK-NEXT: subs.w r1, r1, r8 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: bfi r3, r1, #0, #8 +; CHECK-NEXT: vmov r1, r4, d7 +; CHECK-NEXT: subs.w r1, r1, r8 +; CHECK-NEXT: sbcs r1, r4, #0 +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: bfi r3, r1, #8, #8 +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q3, q3, q1 ; CHECK-NEXT: vmov.f32 s13, s14 ; CHECK-NEXT: vmov.f32 s14, s8 @@ -335,31 +315,30 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vstrb.8 q3, [r2], #16 ; CHECK-NEXT: le lr, .LBB1_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: ldrd r7, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r7, r3 +; CHECK-NEXT: ldrd r1, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: cmp r1, r3 ; CHECK-NEXT: beq .LBB1_8 ; CHECK-NEXT: .LBB1_6: @ %for.body.preheader21 -; CHECK-NEXT: sub.w lr, r3, r7 -; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: sub.w lr, r3, r1 +; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: mov.w r3, #-2147483648 ; CHECK-NEXT: mvn r2, #-2147483648 ; CHECK-NEXT: .LBB1_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r4, [r12], #4 -; CHECK-NEXT: ldr r5, [r0], #4 -; CHECK-NEXT: smull r4, r5, r5, r4 -; CHECK-NEXT: asrl r4, r5, #31 -; CHECK-NEXT: subs r6, r3, r4 -; CHECK-NEXT: sbcs.w r6, r1, r5 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: ldr r1, [r12], #4 +; CHECK-NEXT: ldr r4, [r9], #4 +; CHECK-NEXT: smull r4, r1, r4, r1 +; CHECK-NEXT: asrl r4, r1, #31 +; CHECK-NEXT: subs r5, r3, r4 +; CHECK-NEXT: sbcs.w r5, r0, r1 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: csel r4, r4, r3, ne -; CHECK-NEXT: csel r5, r5, r1, ne -; CHECK-NEXT: subs r6, r4, r2 -; CHECK-NEXT: sbcs r5, r5, #0 -; CHECK-NEXT: csel r4, r4, r2, lt -; CHECK-NEXT: str r4, [r11], #4 +; CHECK-NEXT: csel r1, r1, r0, ne +; CHECK-NEXT: subs r5, r4, r2 +; CHECK-NEXT: sbcs r1, r1, #0 +; CHECK-NEXT: csel r1, r4, r2, lt +; CHECK-NEXT: str r1, [r11], #4 ; CHECK-NEXT: le lr, .LBB1_7 ; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #16 @@ -468,21 +447,21 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB2_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: adds r7, r3, #3 -; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: bic r7, r7, #3 +; CHECK-NEXT: adds r6, r3, #3 +; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: bic r6, r6, #3 ; CHECK-NEXT: adr r4, .LCPI2_1 -; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: adr r5, .LCPI2_2 +; CHECK-NEXT: subs r6, #4 ; CHECK-NEXT: vldrw.u32 q2, [r4] -; CHECK-NEXT: vldrw.u32 q3, [r5] -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: adr r6, .LCPI2_0 -; CHECK-NEXT: subs r7, r3, #1 -; CHECK-NEXT: vldrw.u32 q0, [r6] ; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: vdup.32 q1, r7 ; CHECK-NEXT: mov.w r12, #-1 +; CHECK-NEXT: add.w lr, r5, r6, lsr #2 +; CHECK-NEXT: adr r5, .LCPI2_0 +; CHECK-NEXT: vldrw.u32 q0, [r5] +; CHECK-NEXT: adr r5, .LCPI2_2 +; CHECK-NEXT: subs r6, r3, #1 +; CHECK-NEXT: vldrw.u32 q3, [r5] +; CHECK-NEXT: vdup.32 q1, r6 ; CHECK-NEXT: mvn r8, #-2147483648 ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: .LBB2_2: @ %vector.body @@ -502,14 +481,12 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov.f32 s28, s26 ; CHECK-NEXT: vmov.f32 s30, s27 ; CHECK-NEXT: vmullb.s32 q0, q7, q4 -; CHECK-NEXT: vmov.f32 s22, s21 +; CHECK-NEXT: vmov.f32 s22, s25 ; CHECK-NEXT: vmov r10, r5, d0 ; CHECK-NEXT: asrl r10, r5, #31 ; CHECK-NEXT: rsbs.w r7, r10, #-2147483648 ; CHECK-NEXT: sbcs.w r7, r12, r5 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csetm r7, ne +; CHECK-NEXT: csetm r7, lt ; CHECK-NEXT: bfi r4, r7, #0, #8 ; CHECK-NEXT: vmov r6, r7, d1 ; CHECK-NEXT: asrl r6, r7, #31 @@ -517,72 +494,58 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q0[2], q0[0], r10, r6 ; CHECK-NEXT: sbcs.w r3, r12, r7 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt +; CHECK-NEXT: vmov r7, s22 ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vpsel q4, q0, q2 -; CHECK-NEXT: vmov.f32 s2, s25 -; CHECK-NEXT: vmov r3, r4, d8 -; CHECK-NEXT: vmov r7, s2 +; CHECK-NEXT: vpsel q0, q0, q2 +; CHECK-NEXT: vmov r3, r4, d0 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d9 +; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmov r3, s20 ; CHECK-NEXT: vmsr p0, r4 ; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vpsel q4, q4, q3 +; CHECK-NEXT: vpsel q4, q0, q3 +; CHECK-NEXT: vmov.f32 s2, s21 ; CHECK-NEXT: smull r10, r5, r4, r3 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: asrl r10, r5, #31 ; CHECK-NEXT: rsbs.w r3, r10, #-2147483648 ; CHECK-NEXT: sbcs.w r3, r12, r5 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, s22 -; CHECK-NEXT: smull r6, r7, r7, r3 -; CHECK-NEXT: asrl r6, r7, #31 -; CHECK-NEXT: rsbs.w r3, r6, #-2147483648 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: smull r6, r3, r7, r3 +; CHECK-NEXT: asrl r6, r3, #31 +; CHECK-NEXT: rsbs.w r7, r6, #-2147483648 ; CHECK-NEXT: vmov q0[2], q0[0], r10, r6 -; CHECK-NEXT: sbcs.w r3, r12, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r5, r7 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r4, r3, #8, #8 +; CHECK-NEXT: sbcs.w r7, r12, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r5, r3 +; CHECK-NEXT: csetm r7, lt +; CHECK-NEXT: bfi r4, r7, #8, #8 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vpsel q5, q0, q2 -; CHECK-NEXT: vmov r3, r4, d10 +; CHECK-NEXT: vpsel q0, q0, q2 +; CHECK-NEXT: vmov r3, r4, d0 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r4, #0 ; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d11 +; CHECK-NEXT: vmov r3, r5, d1 ; CHECK-NEXT: subs.w r3, r3, r8 ; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: cset r3, lt -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r4, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r4 -; CHECK-NEXT: vpsel q0, q5, q3 +; CHECK-NEXT: vpsel q0, q0, q3 ; CHECK-NEXT: vldr p0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: vmov.f32 s1, s2 ; CHECK-NEXT: vmov.f32 s2, s16 @@ -693,9 +656,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: lsrl r4, r9, #31 ; CHECK-NEXT: subs.w r5, r4, #-1 ; CHECK-NEXT: sbcs r5, r9, #0 -; CHECK-NEXT: cset r5, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r8, r5, #0, #8 ; CHECK-NEXT: umull r6, r5, r3, r6 ; CHECK-NEXT: lsrl r6, r5, #31 @@ -703,9 +664,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q1[2], q1[0], r4, r6 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r9, r5 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: bfi r8, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r8 ; CHECK-NEXT: vpsel q1, q1, q0 @@ -858,9 +817,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: subs.w r5, r4, #-1 ; CHECK-NEXT: vmullb.u32 q4, q3, q1 ; CHECK-NEXT: sbcs r5, r9, #0 -; CHECK-NEXT: cset r5, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: vmov r8, r5, d11 ; CHECK-NEXT: lsrl r8, r5, #31 @@ -868,9 +825,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q2[2], q2[0], r4, r8 ; CHECK-NEXT: sbcs r7, r5, #0 ; CHECK-NEXT: vmov q2[3], q2[1], r9, r5 -; CHECK-NEXT: cset r7, lo -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csetm r7, ne +; CHECK-NEXT: csetm r7, lo ; CHECK-NEXT: bfi r6, r7, #8, #8 ; CHECK-NEXT: vmov r4, r7, d8 ; CHECK-NEXT: lsrl r4, r7, #31 @@ -879,9 +834,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: sbcs r5, r7, #0 ; CHECK-NEXT: vpsel q2, q2, q0 -; CHECK-NEXT: cset r5, lo -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne +; CHECK-NEXT: csetm r5, lo ; CHECK-NEXT: bfi r6, r5, #0, #8 ; CHECK-NEXT: vmov r2, r5, d9 ; CHECK-NEXT: lsrl r2, r5, #31 @@ -889,9 +842,7 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vmov q1[2], q1[0], r4, r2 ; CHECK-NEXT: sbcs r3, r5, #0 ; CHECK-NEXT: vmov q1[3], q1[1], r7, r5 -; CHECK-NEXT: cset r3, lo -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne +; CHECK-NEXT: csetm r3, lo ; CHECK-NEXT: bfi r6, r3, #8, #8 ; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll index 10ecdc01918dd4..bbc0ff9bd1be58 100644 --- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll @@ -36,21 +36,19 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r0, r2, d2 -; CHECK-NEXT: vmov r3, r1, d0 -; CHECK-NEXT: adds.w r12, r3, r0 +; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: adds.w r12, r2, r0 ; CHECK-NEXT: vmov r0, r4, d1 -; CHECK-NEXT: adc.w lr, r1, r2 -; CHECK-NEXT: subs.w r3, r12, r3 -; CHECK-NEXT: sbcs.w r1, lr, r1 -; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: adc.w lr, r3, r1 +; CHECK-NEXT: subs.w r2, r12, r2 +; CHECK-NEXT: sbcs.w r2, lr, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r1, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: it mi -; CHECK-NEXT: eormi r1, r1, #1 +; CHECK-NEXT: eormi r2, r2, #1 +; CHECK-NEXT: rsbs r1, r2, #0 ; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: bfi r2, r1, #0, #8 ; CHECK-NEXT: vmov r1, r3, d3 ; CHECK-NEXT: adds r1, r1, r0 @@ -59,14 +57,12 @@ define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) ; CHECK-NEXT: sbcs.w r0, r5, r4 ; CHECK-NEXT: vmov q0[2], q0[0], r12, r1 ; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: asr.w r1, lr, #31 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov q0[3], q0[1], lr, r5 -; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it mi ; CHECK-NEXT: eormi r0, r0, #1 +; CHECK-NEXT: asr.w r1, lr, #31 ; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r5 ; CHECK-NEXT: bfi r2, r0, #8, #8 ; CHECK-NEXT: asrs r0, r5, #31 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 @@ -122,32 +118,28 @@ entry: define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: uadd_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vmov r0, r1, d3 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: adds.w lr, r2, r0 -; CHECK-NEXT: vmov r0, r4, d0 -; CHECK-NEXT: adc.w r12, r3, r1 -; CHECK-NEXT: subs.w r2, lr, r2 -; CHECK-NEXT: sbcs.w r2, r12, r3 -; CHECK-NEXT: vmov r3, r1, d2 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: adds r3, r3, r0 -; CHECK-NEXT: adcs r1, r4 -; CHECK-NEXT: subs r0, r3, r0 -; CHECK-NEXT: sbcs.w r0, r1, r4 -; CHECK-NEXT: vmov q1[2], q1[0], r3, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: vmov q1[3], q1[1], r1, r12 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 +; CHECK-NEXT: adds r5, r2, r0 +; CHECK-NEXT: adc.w lr, r3, r1 +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, lr, r3 +; CHECK-NEXT: vmov r3, r12, d2 +; CHECK-NEXT: vmov r1, r4, d0 +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: adds r3, r3, r1 +; CHECK-NEXT: adc.w r0, r4, r12 +; CHECK-NEXT: subs r1, r3, r1 +; CHECK-NEXT: sbcs.w r1, r0, r4 +; CHECK-NEXT: vmov q1[2], q1[0], r3, r5 +; CHECK-NEXT: csetm r1, lo +; CHECK-NEXT: vmov q1[3], q1[1], r0, lr +; CHECK-NEXT: vmov q0[2], q0[0], r1, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 ; CHECK-NEXT: vorr q0, q1, q0 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 @@ -187,55 +179,47 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: ssub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r1, r3, d2 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: rsbs r2, r1, #0 -; CHECK-NEXT: sbcs.w r2, r0, r3 -; CHECK-NEXT: vmov r2, r4, d0 -; CHECK-NEXT: cset lr, lt -; CHECK-NEXT: subs.w r12, r2, r1 -; CHECK-NEXT: sbc.w r5, r4, r3 -; CHECK-NEXT: subs.w r2, r12, r2 -; CHECK-NEXT: sbcs.w r2, r5, r4 -; CHECK-NEXT: vmov r3, r4, d3 -; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne -; CHECK-NEXT: cmp.w lr, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r2, r2, #1 +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: vmov r2, r3, d2 +; CHECK-NEXT: vmov r1, r0, d0 +; CHECK-NEXT: vmov r4, r5, d1 +; CHECK-NEXT: subs.w r12, r1, r2 +; CHECK-NEXT: sbc.w lr, r0, r3 +; CHECK-NEXT: subs.w r1, r12, r1 +; CHECK-NEXT: sbcs.w r0, lr, r0 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: cset r0, lt ; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: rsbs r1, r3, #0 -; CHECK-NEXT: sbcs.w r1, r0, r4 -; CHECK-NEXT: bfi r0, r2, #0, #8 -; CHECK-NEXT: vmov r2, r1, d1 -; CHECK-NEXT: cset lr, lt -; CHECK-NEXT: subs r3, r2, r3 -; CHECK-NEXT: sbc.w r4, r1, r4 -; CHECK-NEXT: subs r2, r3, r2 -; CHECK-NEXT: sbcs.w r1, r4, r1 -; CHECK-NEXT: vmov q0[2], q0[0], r12, r3 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: vmov q0[3], q0[1], r5, r4 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r1, ne -; CHECK-NEXT: cmp.w lr, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: eorne r1, r1, #1 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: bfi r0, r1, #8, #8 -; CHECK-NEXT: asrs r1, r5, #31 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: asrs r0, r4, #31 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: it lt +; CHECK-NEXT: eorlt r0, r0, #1 +; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: subs r6, r4, r2 +; CHECK-NEXT: sbc.w r7, r5, r3 +; CHECK-NEXT: subs r4, r6, r4 +; CHECK-NEXT: sbcs.w r4, r7, r5 +; CHECK-NEXT: vmov q0[2], q0[0], r12, r6 +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: sbcs.w r2, r1, r3 +; CHECK-NEXT: bfi r1, r0, #0, #8 +; CHECK-NEXT: it lt +; CHECK-NEXT: eorlt r4, r4, #1 +; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: bfi r1, r0, #8, #8 +; CHECK-NEXT: asrs r0, r7, #31 +; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: asr.w r1, lr, #31 ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], lr, r7 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r0 ; CHECK-NEXT: adr r0, .LCPI11_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: veor q1, q1, q2 ; CHECK-NEXT: vpsel q0, q1, q0 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI11_0: @@ -281,32 +265,28 @@ entry: define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: usub_int64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vmov r0, r1, d3 ; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: subs.w lr, r2, r0 -; CHECK-NEXT: vmov r0, r4, d0 -; CHECK-NEXT: sbc.w r12, r3, r1 -; CHECK-NEXT: subs.w r2, r2, lr -; CHECK-NEXT: sbcs.w r2, r3, r12 -; CHECK-NEXT: vmov r3, r1, d2 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: subs r3, r0, r3 -; CHECK-NEXT: sbc.w r1, r4, r1 -; CHECK-NEXT: subs r0, r0, r3 -; CHECK-NEXT: sbcs.w r0, r4, r1 -; CHECK-NEXT: vmov q1[2], q1[0], r3, lr -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: vmov q1[3], q1[1], r1, r12 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 +; CHECK-NEXT: subs r5, r2, r0 +; CHECK-NEXT: sbc.w lr, r3, r1 +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs.w r2, r3, lr +; CHECK-NEXT: vmov r3, r12, d2 +; CHECK-NEXT: vmov r1, r4, d0 +; CHECK-NEXT: csetm r2, lo +; CHECK-NEXT: subs r3, r1, r3 +; CHECK-NEXT: sbc.w r0, r4, r12 +; CHECK-NEXT: subs r1, r1, r3 +; CHECK-NEXT: sbcs.w r1, r4, r0 +; CHECK-NEXT: vmov q1[2], q1[0], r3, r5 +; CHECK-NEXT: csetm r1, lo +; CHECK-NEXT: vmov q1[3], q1[1], r0, lr +; CHECK-NEXT: vmov q0[2], q0[0], r1, r2 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r2 ; CHECK-NEXT: vbic q0, q1, q0 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2) ret <2 x i64> %0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll index fae8e393ea9490..f8e0a493b403ee 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll @@ -399,17 +399,13 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_slt_v2i64(<2 x i64> %src, <2 x i64> %srcb ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: sbcs.w r0, r3, r1 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov r3, r2, d1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r12, d3 ; CHECK-NEXT: subs r0, r3, r0 ; CHECK-NEXT: sbcs.w r0, r2, r12 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q2, q3 @@ -470,8 +466,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -487,8 +481,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll index d5cddc6fcfeb41..5802b0073f292f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -279,25 +279,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1122,69 +1114,53 @@ entry: define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %src2, <8 x half> %a, <8 x half> %b) { ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .vsave {d8, d9} -; CHECK-MVE-NEXT: vpush {d8, d9} +; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vmovx.f16 s16, s4 ; CHECK-MVE-NEXT: vmovx.f16 s18, s0 ; CHECK-MVE-NEXT: vcmp.f16 s18, s16 -; CHECK-MVE-NEXT: vmovx.f16 s16, s8 +; CHECK-MVE-NEXT: vmovx.f16 s20, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmovx.f16 s22, s12 ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmovx.f16 s18, s12 ; CHECK-MVE-NEXT: vmovx.f16 s4, s5 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s16, s18, s16 +; CHECK-MVE-NEXT: vseleq.f16 s16, s22, s20 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 ; CHECK-MVE-NEXT: vmovx.f16 s8, s1 ; CHECK-MVE-NEXT: vcmp.f16 s8, s4 -; CHECK-MVE-NEXT: vmovx.f16 s4, s9 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s13 -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: vins.f16 s0, s16 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 +; CHECK-MVE-NEXT: vmovx.f16 s16, s13 ; CHECK-MVE-NEXT: vmovx.f16 s8, s2 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s5, s14 +; CHECK-MVE-NEXT: vseleq.f16 s4, s16, s12 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vcmp.f16 s8, s4 -; CHECK-MVE-NEXT: vmovx.f16 s4, s10 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s8, s14 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s5, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 +; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s4, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s6, s15 ; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s4 -; CHECK-MVE-NEXT: vpop {d8, d9} +; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11} ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: vcmp_une_v8f16: diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll index 37225a44b3657f..de6e85a8f5887e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -300,25 +300,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1137,59 +1129,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, half %src2, < ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 -; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: vmovx.f16 s5, s8 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s8 +; CHECK-MVE-NEXT: vmovx.f16 s7, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s7, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s13 +; CHECK-MVE-NEXT: vmovx.f16 s8, s9 ; CHECK-MVE-NEXT: vins.f16 s0, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 +; CHECK-MVE-NEXT: vmovx.f16 s12, s13 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s1, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s14 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s10 +; CHECK-MVE-NEXT: vmovx.f16 s12, s14 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s15 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 +; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 ; CHECK-MVE-NEXT: vcmp.f16 s6, s4 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, s4 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s10, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s6 ; CHECK-MVE-NEXT: bx lr @@ -1961,25 +1937,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -2798,59 +2766,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, half %src2, ; CHECK-MVE-LABEL: vcmp_r_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 -; CHECK-MVE-NEXT: vmovx.f16 s5, s12 +; CHECK-MVE-NEXT: vmovx.f16 s5, s8 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s8 +; CHECK-MVE-NEXT: vmovx.f16 s7, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s5, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s7, s5 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s12, s8 -; CHECK-MVE-NEXT: vmovx.f16 s8, s13 +; CHECK-MVE-NEXT: vmovx.f16 s8, s9 ; CHECK-MVE-NEXT: vins.f16 s0, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s9 +; CHECK-MVE-NEXT: vmovx.f16 s12, s13 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s1 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s14 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s10 +; CHECK-MVE-NEXT: vmovx.f16 s12, s14 ; CHECK-MVE-NEXT: vseleq.f16 s1, s13, s9 ; CHECK-MVE-NEXT: vins.f16 s1, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s10 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s2 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s15 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s14, s10 +; CHECK-MVE-NEXT: vmovx.f16 s10, s15 ; CHECK-MVE-NEXT: vins.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s4, s3 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s6, s8, s6 +; CHECK-MVE-NEXT: vseleq.f16 s6, s10, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s15, s11 ; CHECK-MVE-NEXT: vins.f16 s3, s6 ; CHECK-MVE-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll index 722a5313b1d6e7..809bf664fc95fb 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -279,25 +279,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1074,59 +1066,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_une_v8f16(<8 x half> %src, <8 x half> %a ; CHECK-MVE-LABEL: vcmp_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 -; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vmovx.f16 s14, s4 ; CHECK-MVE-NEXT: vcmp.f16 s12, #0 -; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmovx.f16 s13, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s5 +; CHECK-MVE-NEXT: vins.f16 s0, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 ; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vins.f16 s0, s12 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s10 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s9, s5 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s10, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmovx.f16 s6, s7 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s11, s7 ; CHECK-MVE-NEXT: vins.f16 s3, s4 ; CHECK-MVE-NEXT: bx lr @@ -1856,25 +1832,17 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_une_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_une_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 -; CHECK-MVE-NEXT: cset r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 -; CHECK-MVE-NEXT: cset r2, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r3, ne -; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 -; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 -; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -2651,59 +2619,43 @@ define arm_aapcs_vfpcc <8 x half> @vcmp_r_une_v8f16(<8 x half> %src, <8 x half> ; CHECK-MVE-LABEL: vcmp_r_une_v8f16: ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: vmovx.f16 s12, s0 -; CHECK-MVE-NEXT: vmovx.f16 s14, s8 +; CHECK-MVE-NEXT: vmovx.f16 s14, s4 ; CHECK-MVE-NEXT: vcmp.f16 s12, #0 -; CHECK-MVE-NEXT: vmovx.f16 s12, s4 +; CHECK-MVE-NEXT: vmovx.f16 s13, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s14 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s1 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s5 +; CHECK-MVE-NEXT: vins.f16 s0, s12 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s9 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 +; CHECK-MVE-NEXT: vmovx.f16 s12, s9 ; CHECK-MVE-NEXT: vcmp.f16 s1, #0 -; CHECK-MVE-NEXT: vins.f16 s0, s12 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s8, s10 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 +; CHECK-MVE-NEXT: vmovx.f16 s12, s10 ; CHECK-MVE-NEXT: vseleq.f16 s1, s9, s5 ; CHECK-MVE-NEXT: vins.f16 s1, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s2 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s8 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmovx.f16 s8, s11 ; CHECK-MVE-NEXT: vseleq.f16 s2, s10, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s11 +; CHECK-MVE-NEXT: vmovx.f16 s6, s7 ; CHECK-MVE-NEXT: vins.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s4, s3 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0 -; CHECK-MVE-NEXT: vmovx.f16 s4, s7 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s3, #0 -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s4 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: cset r0, ne -; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s11, s7 ; CHECK-MVE-NEXT: vins.f16 s3, s4 ; CHECK-MVE-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll index 9b765e8ac938bb..707290f4f66cd7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll @@ -507,8 +507,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -524,8 +522,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, < ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -1056,8 +1052,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 @@ -1073,8 +1067,6 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, ; CHECK-NEXT: sbcs.w r2, r12, r3, asr #31 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: cmp r3, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll index a3b1cc0a24a884..75f7350fcd5b15 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqmovn.ll @@ -169,17 +169,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) { ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: subs.w r0, r0, r12 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bfi r3, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: subs.w r1, r1, r12 ; CHECK-NEXT: sbcs r1, r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r3, r1, #8, #8 ; CHECK-NEXT: adr r1, .LCPI12_0 ; CHECK-NEXT: vldrw.u32 q1, [r1] @@ -189,16 +185,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) { ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 ; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 ; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: adr r0, .LCPI12_1 @@ -233,17 +225,13 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) { ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: bfi r3, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 ; CHECK-NEXT: sbcs.w r1, r12, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r3, r1, #8, #8 ; CHECK-NEXT: adr r1, .LCPI13_0 ; CHECK-NEXT: vldrw.u32 q1, [r1] @@ -253,16 +241,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) { ; CHECK-NEXT: vmov r1, r2, d0 ; CHECK-NEXT: subs r1, r1, r3 ; CHECK-NEXT: sbcs r1, r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #0, #8 ; CHECK-NEXT: vmov r1, r2, d1 ; CHECK-NEXT: subs r1, r1, r3 ; CHECK-NEXT: sbcs r1, r2, #0 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne +; CHECK-NEXT: csetm r1, lt ; CHECK-NEXT: bfi r0, r1, #8, #8 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: adr r0, .LCPI13_1 @@ -297,16 +281,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) { ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r2, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -325,16 +305,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) { ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #0, #8 ; CHECK-NEXT: vmov r0, r2, d1 ; CHECK-NEXT: subs.w r0, r0, #-1 ; CHECK-NEXT: sbcs r0, r2, #0 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll index 1220ca2f607009..f78d36222c3121 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqshrn.ll @@ -182,49 +182,41 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r2, r1, d1 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: mvn r12, #-2147483648 -; CHECK-NEXT: vmov r0, r3, d0 -; CHECK-NEXT: asrl r2, r1, #3 -; CHECK-NEXT: asrl r0, r3, #3 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 -; CHECK-NEXT: subs.w r0, r0, r12 -; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: asrl r0, r1, #3 +; CHECK-NEXT: asrl r2, r3, #3 +; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 +; CHECK-NEXT: subs.w r2, r2, r12 +; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 -; CHECK-NEXT: cset r0, lt +; CHECK-NEXT: csetm lr, lt +; CHECK-NEXT: subs.w r0, r0, r12 +; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: bfi r2, lr, #0, #8 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r2, r0, #8, #8 +; CHECK-NEXT: adr r0, .LCPI12_0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmsr p0, r2 +; CHECK-NEXT: mov.w r2, #-1 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm lr, ne -; CHECK-NEXT: subs.w r2, r2, r12 -; CHECK-NEXT: sbcs r1, r1, #0 -; CHECK-NEXT: bfi r3, lr, #0, #8 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r3, r1, #8, #8 -; CHECK-NEXT: adr r1, .LCPI12_0 -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vmsr p0, r3 -; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: vpsel q0, q0, q1 -; CHECK-NEXT: vmov r1, r2, d0 -; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 -; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r0, r1, #0, #8 -; CHECK-NEXT: vmov r1, r2, d1 -; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 -; CHECK-NEXT: sbcs.w r1, r3, r2 -; CHECK-NEXT: cset r1, lt -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: bfi r0, r1, #8, #8 -; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 +; CHECK-NEXT: sbcs.w r0, r2, r1 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r3, r0, #0, #8 +; CHECK-NEXT: vmov r0, r1, d1 +; CHECK-NEXT: rsbs.w r0, r0, #-2147483648 +; CHECK-NEXT: sbcs.w r0, r2, r1 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: adr r0, .LCPI12_1 ; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: pop {r7, pc} ; CHECK-NEXT: .p2align 4 @@ -251,53 +243,45 @@ entry: define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) { ; CHECK-LABEL: vqshrni64_sminmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r2, r1, d0 ; CHECK-NEXT: mov.w r12, #-1 ; CHECK-NEXT: asrl r2, r1, #3 -; CHECK-NEXT: mov.w lr, #0 +; CHECK-NEXT: vmov r4, r5, d1 ; CHECK-NEXT: rsbs.w r0, r2, #-2147483648 +; CHECK-NEXT: asrl r4, r5, #3 ; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: bfi r0, r3, #0, #8 -; CHECK-NEXT: vmov r4, r3, d1 -; CHECK-NEXT: asrl r4, r3, #3 -; CHECK-NEXT: rsbs.w r5, r4, #-2147483648 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm lr, lt +; CHECK-NEXT: rsbs.w r0, r4, #-2147483648 +; CHECK-NEXT: sbcs.w r0, r12, r5 +; CHECK-NEXT: bfi r3, lr, #0, #8 +; CHECK-NEXT: csetm r0, lt ; CHECK-NEXT: vmov q0[2], q0[0], r2, r4 -; CHECK-NEXT: sbcs.w r5, r12, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csetm r5, ne -; CHECK-NEXT: bfi r0, r5, #8, #8 -; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: adr r0, .LCPI13_0 ; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmsr p0, r3 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: mvn r2, #-2147483648 ; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: movs r6, #0 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: subs r0, r0, r2 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi lr, r0, #0, #8 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r6, r0, #0, #8 ; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: subs r0, r0, r2 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: cset r0, lt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: bfi lr, r0, #8, #8 +; CHECK-NEXT: csetm r0, lt +; CHECK-NEXT: bfi r6, r0, #8, #8 ; CHECK-NEXT: adr r0, .LCPI13_1 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vmsr p0, lr +; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vpsel q0, q0, q1 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI13_0: @@ -331,16 +315,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) { ; CHECK-NEXT: subs.w r2, r2, #-1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -364,16 +344,12 @@ define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) { ; CHECK-NEXT: subs.w r2, r2, #-1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 -; CHECK-NEXT: cset r2, lo -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne +; CHECK-NEXT: csetm r2, lo ; CHECK-NEXT: subs.w r0, r0, #-1 +; CHECK-NEXT: mov.w r3, #0 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: bfi r3, r2, #0, #8 -; CHECK-NEXT: cset r0, lo -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne +; CHECK-NEXT: csetm r0, lo ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q0, q0, q1