Skip to content

Commit 64a66a2

Browse files
committed
Fix and mitigate alias mask
1 parent fd92718 commit 64a66a2

File tree

3 files changed

+152
-101
lines changed

3 files changed

+152
-101
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3712,7 +3712,8 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
37123712
}
37133713

37143714
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3715-
const SDLoc &DL, SelectionDAG &DAG) {
3715+
const SDLoc &DL, SelectionDAG &DAG,
3716+
bool MIOrPLSupported = false) {
37163717
EVT VT = LHS.getValueType();
37173718
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
37183719

@@ -3755,6 +3756,33 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
37553756
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
37563757
// Use result of ANDS
37573758
return LHS.getValue(1);
3759+
} else if (MIOrPLSupported) {
3760+
// For MIOrPLSupported, optimize SUB/ADD operations with zero comparison
3761+
if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETLT) {
3762+
// SUB(x, y) < 0 -> SUBS(x, y)
3763+
return DAG
3764+
.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
3765+
LHS.getOperand(0), LHS.getOperand(1))
3766+
.getValue(1);
3767+
} else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETGE) {
3768+
// ADD(x, y) >= 0 -> ADDS(x, y)
3769+
return DAG
3770+
.getNode(AArch64ISD::ADDS, DL, DAG.getVTList(VT, FlagsVT),
3771+
LHS.getOperand(0), LHS.getOperand(1))
3772+
.getValue(1);
3773+
} else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETLT) {
3774+
// ADD(x, y) < 0 -> SUBS(x, y)
3775+
return DAG
3776+
.getNode(AArch64ISD::ADDS, DL, DAG.getVTList(VT, FlagsVT),
3777+
LHS.getOperand(0), LHS.getOperand(1))
3778+
.getValue(1);
3779+
} else if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETGE) {
3780+
// SUB(x, y) >= 0 -> ADDS(x, y)
3781+
return DAG
3782+
.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
3783+
LHS.getOperand(0), LHS.getOperand(1))
3784+
.getValue(1);
3785+
}
37583786
}
37593787
}
37603788

@@ -3819,7 +3847,8 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
38193847
ISD::CondCode CC, SDValue CCOp,
38203848
AArch64CC::CondCode Predicate,
38213849
AArch64CC::CondCode OutCC,
3822-
const SDLoc &DL, SelectionDAG &DAG) {
3850+
const SDLoc &DL, SelectionDAG &DAG,
3851+
bool MIOrPLSupported = false) {
38233852
unsigned Opcode = 0;
38243853
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
38253854

@@ -3846,6 +3875,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
38463875
// we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
38473876
Opcode = AArch64ISD::CCMN;
38483877
LHS = LHS.getOperand(1);
3878+
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC) &&
3879+
MIOrPLSupported) {
3880+
// For MIOrPLSupported, optimize SUB/ADD operations with zero comparison
3881+
if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETLT) {
3882+
// SUB(x, y) < 0 -> CCMP(x, y) with appropriate condition
3883+
Opcode = AArch64ISD::CCMP;
3884+
RHS = LHS.getOperand(1);
3885+
LHS = LHS.getOperand(0);
3886+
} else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETGE) {
3887+
// ADD(x, y) >= 0 -> CCMP(x, y) with appropriate condition
3888+
Opcode = AArch64ISD::CCMN;
3889+
RHS = LHS.getOperand(1);
3890+
LHS = LHS.getOperand(0);
3891+
} else if (LHS.getOpcode() == ISD::ADD && CC == ISD::SETLT) {
3892+
// ADD(x, y) < 0 -> CCMP(x, -y) with appropriate condition
3893+
Opcode = AArch64ISD::CCMN;
3894+
RHS = LHS.getOperand(1);
3895+
LHS = LHS.getOperand(0);
3896+
} else if (LHS.getOpcode() == ISD::SUB && CC == ISD::SETGE) {
3897+
// SUB(x, y) >= 0 -> CCMP(-x, y) with appropriate condition
3898+
Opcode = AArch64ISD::CCMP;
3899+
RHS = LHS.getOperand(1);
3900+
LHS = LHS.getOperand(0);
3901+
}
38493902
}
38503903
if (Opcode == 0)
38513904
Opcode = AArch64ISD::CCMP;
@@ -3972,7 +4025,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
39724025
return emitComparison(LHS, RHS, CC, DL, DAG);
39734026
// Otherwise produce a ccmp.
39744027
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
3975-
DAG);
4028+
DAG, true);
39764029
}
39774030
assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
39784031

@@ -4251,7 +4304,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
42514304
}
42524305

42534306
if (!Cmp) {
4254-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
4307+
Cmp = emitComparison(LHS, RHS, CC, DL, DAG, true);
42554308
AArch64CC = changeIntCCToAArch64CC(CC, RHS);
42564309
}
42574310
AArch64cc = getCondCode(DAG, AArch64CC);
@@ -7371,13 +7424,13 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
73717424
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
73727425

73737426
SDLoc DL(Op);
7374-
SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, VT);
73757427

7376-
// Generate SUBS & CSEL.
7377-
SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT),
7378-
Op.getOperand(0), DAG.getConstant(0, DL, VT));
7428+
// Generate CMP & CSEL.
7429+
SDValue Cmp = emitComparison(Op.getOperand(0), DAG.getConstant(0, DL, VT),
7430+
ISD::SETGE, DL, DAG, true);
7431+
SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, VT);
73797432
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
7380-
getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1));
7433+
getCondCode(DAG, AArch64CC::PL), Cmp);
73817434
}
73827435

73837436
static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {

llvm/test/CodeGen/AArch64/alias_mask.ll

Lines changed: 88 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -393,70 +393,69 @@ entry:
393393
define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) {
394394
; CHECK-LABEL: whilewr_32_expand3:
395395
; CHECK: // %bb.0: // %entry
396-
; CHECK-NEXT: sub x10, x1, x0
396+
; CHECK-NEXT: subs x10, x1, x0
397397
; CHECK-NEXT: index z0.d, #0, #1
398-
; CHECK-NEXT: sub x9, x10, #61
399-
; CHECK-NEXT: subs x11, x10, #64
400-
; CHECK-NEXT: add x12, x10, #3
401-
; CHECK-NEXT: csel x9, x9, x11, mi
398+
; CHECK-NEXT: add x9, x10, #3
399+
; CHECK-NEXT: sub x12, x10, #61
400+
; CHECK-NEXT: csel x9, x9, x10, mi
402401
; CHECK-NEXT: asr x11, x9, #2
403-
; CHECK-NEXT: mov z1.d, z0.d
404402
; CHECK-NEXT: mov z2.d, z0.d
405403
; CHECK-NEXT: mov z3.d, z0.d
406-
; CHECK-NEXT: cmp x11, #1
407404
; CHECK-NEXT: mov z4.d, z0.d
405+
; CHECK-NEXT: cmp x11, #1
406+
; CHECK-NEXT: dup v1.2d, x11
408407
; CHECK-NEXT: mov z5.d, z0.d
409408
; CHECK-NEXT: cset w9, lt
410-
; CHECK-NEXT: cmp x10, #0
409+
; CHECK-NEXT: subs x10, x10, #64
411410
; CHECK-NEXT: mov z6.d, z0.d
412411
; CHECK-NEXT: csel x10, x12, x10, mi
413-
; CHECK-NEXT: dup v7.2d, x11
414-
; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc
412+
; CHECK-NEXT: mov z7.d, z0.d
413+
; CHECK-NEXT: add z2.d, z2.d, #12 // =0xc
415414
; CHECK-NEXT: asr x10, x10, #2
416-
; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa
417-
; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8
418-
; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6
419-
; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4
420-
; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
415+
; CHECK-NEXT: add z3.d, z3.d, #10 // =0xa
416+
; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8
417+
; CHECK-NEXT: add z5.d, z5.d, #6 // =0x6
418+
; CHECK-NEXT: add z6.d, z6.d, #4 // =0x4
419+
; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d
421420
; CHECK-NEXT: dup v16.2d, x10
422-
; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
423-
; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d
424-
; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d
425-
; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d
421+
; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
422+
; CHECK-NEXT: cmhi v19.2d, v1.2d, v2.2d
423+
; CHECK-NEXT: cmhi v20.2d, v1.2d, v3.2d
424+
; CHECK-NEXT: cmhi v21.2d, v1.2d, v4.2d
426425
; CHECK-NEXT: cmp x10, #1
427-
; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d
426+
; CHECK-NEXT: cmhi v22.2d, v1.2d, v5.2d
428427
; CHECK-NEXT: cset w10, lt
429428
; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
430429
; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
431-
; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
432430
; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
433431
; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
434432
; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
435-
; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
436-
; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
437-
; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
433+
; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
434+
; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d
435+
; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d
436+
; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d
438437
; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
439-
; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d
440-
; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d
441-
; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s
442-
; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
443-
; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s
444-
; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s
445-
; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s
446-
; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s
447-
; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s
438+
; CHECK-NEXT: cmhi v7.2d, v1.2d, v7.2d
439+
; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d
440+
; CHECK-NEXT: uzp1 v1.4s, v21.4s, v20.4s
441+
; CHECK-NEXT: uzp1 v3.4s, v4.4s, v3.4s
442+
; CHECK-NEXT: uzp1 v4.4s, v23.4s, v5.4s
443+
; CHECK-NEXT: uzp1 v5.4s, v18.4s, v24.4s
444+
; CHECK-NEXT: uzp1 v6.4s, v6.4s, v22.4s
445+
; CHECK-NEXT: uzp1 v2.4s, v2.4s, v16.4s
446+
; CHECK-NEXT: uzp1 v7.4s, v17.4s, v7.4s
448447
; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
449-
; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h
450-
; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h
451-
; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h
452-
; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h
453-
; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
454-
; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b
455-
; CHECK-NEXT: dup v3.16b, w10
456-
; CHECK-NEXT: dup v2.16b, w9
448+
; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h
449+
; CHECK-NEXT: uzp1 v2.8h, v3.8h, v2.8h
450+
; CHECK-NEXT: uzp1 v3.8h, v7.8h, v6.8h
451+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
452+
; CHECK-NEXT: uzp1 v1.16b, v4.16b, v2.16b
453+
; CHECK-NEXT: uzp1 v0.16b, v3.16b, v0.16b
454+
; CHECK-NEXT: dup v2.16b, w10
455+
; CHECK-NEXT: dup v3.16b, w9
457456
; CHECK-NEXT: adrp x9, .LCPI14_0
458-
; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
459-
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
457+
; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b
458+
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
460459
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_0]
461460
; CHECK-NEXT: shl v1.16b, v1.16b, #7
462461
; CHECK-NEXT: shl v0.16b, v0.16b, #7
@@ -470,8 +469,8 @@ define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) {
470469
; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
471470
; CHECK-NEXT: addv h1, v1.8h
472471
; CHECK-NEXT: addv h0, v0.8h
473-
; CHECK-NEXT: str h1, [x8]
474-
; CHECK-NEXT: str h0, [x8, #2]
472+
; CHECK-NEXT: str h1, [x8, #2]
473+
; CHECK-NEXT: str h0, [x8]
475474
; CHECK-NEXT: ret
476475
entry:
477476
%0 = call <32 x i1> @llvm.loop.dependence.war.mask.v32i1(ptr %a, ptr %b, i64 4)
@@ -587,70 +586,69 @@ entry:
587586
define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) {
588587
; CHECK-LABEL: whilewr_64_expand4:
589588
; CHECK: // %bb.0: // %entry
590-
; CHECK-NEXT: sub x10, x1, x0
589+
; CHECK-NEXT: subs x10, x1, x0
591590
; CHECK-NEXT: index z0.d, #0, #1
592-
; CHECK-NEXT: sub x9, x10, #121
593-
; CHECK-NEXT: subs x11, x10, #128
594-
; CHECK-NEXT: add x12, x10, #7
595-
; CHECK-NEXT: csel x9, x9, x11, mi
591+
; CHECK-NEXT: add x9, x10, #7
592+
; CHECK-NEXT: sub x12, x10, #121
593+
; CHECK-NEXT: csel x9, x9, x10, mi
596594
; CHECK-NEXT: asr x11, x9, #3
597-
; CHECK-NEXT: mov z1.d, z0.d
598595
; CHECK-NEXT: mov z2.d, z0.d
599596
; CHECK-NEXT: mov z3.d, z0.d
600-
; CHECK-NEXT: cmp x11, #1
601597
; CHECK-NEXT: mov z4.d, z0.d
598+
; CHECK-NEXT: cmp x11, #1
599+
; CHECK-NEXT: dup v1.2d, x11
602600
; CHECK-NEXT: mov z5.d, z0.d
603601
; CHECK-NEXT: cset w9, lt
604-
; CHECK-NEXT: cmp x10, #0
602+
; CHECK-NEXT: subs x10, x10, #128
605603
; CHECK-NEXT: mov z6.d, z0.d
606604
; CHECK-NEXT: csel x10, x12, x10, mi
607-
; CHECK-NEXT: dup v7.2d, x11
608-
; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc
605+
; CHECK-NEXT: mov z7.d, z0.d
606+
; CHECK-NEXT: add z2.d, z2.d, #12 // =0xc
609607
; CHECK-NEXT: asr x10, x10, #3
610-
; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa
611-
; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8
612-
; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6
613-
; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4
614-
; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2
608+
; CHECK-NEXT: add z3.d, z3.d, #10 // =0xa
609+
; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8
610+
; CHECK-NEXT: add z5.d, z5.d, #6 // =0x6
611+
; CHECK-NEXT: add z6.d, z6.d, #4 // =0x4
612+
; CHECK-NEXT: cmhi v17.2d, v1.2d, v0.2d
615613
; CHECK-NEXT: dup v16.2d, x10
616-
; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d
617-
; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d
618-
; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d
619-
; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d
614+
; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
615+
; CHECK-NEXT: cmhi v19.2d, v1.2d, v2.2d
616+
; CHECK-NEXT: cmhi v20.2d, v1.2d, v3.2d
617+
; CHECK-NEXT: cmhi v21.2d, v1.2d, v4.2d
620618
; CHECK-NEXT: cmp x10, #1
621-
; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d
619+
; CHECK-NEXT: cmhi v22.2d, v1.2d, v5.2d
622620
; CHECK-NEXT: cset w10, lt
623621
; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d
624622
; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe
625-
; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
626623
; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
627624
; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
628625
; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
629-
; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d
630-
; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d
631-
; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d
626+
; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
627+
; CHECK-NEXT: cmhi v23.2d, v16.2d, v6.2d
628+
; CHECK-NEXT: cmhi v24.2d, v16.2d, v7.2d
629+
; CHECK-NEXT: cmhi v6.2d, v1.2d, v6.2d
632630
; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d
633-
; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d
634-
; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d
635-
; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s
636-
; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
637-
; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s
638-
; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s
639-
; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s
640-
; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s
641-
; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s
631+
; CHECK-NEXT: cmhi v7.2d, v1.2d, v7.2d
632+
; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d
633+
; CHECK-NEXT: uzp1 v1.4s, v21.4s, v20.4s
634+
; CHECK-NEXT: uzp1 v3.4s, v4.4s, v3.4s
635+
; CHECK-NEXT: uzp1 v4.4s, v23.4s, v5.4s
636+
; CHECK-NEXT: uzp1 v5.4s, v18.4s, v24.4s
637+
; CHECK-NEXT: uzp1 v6.4s, v6.4s, v22.4s
638+
; CHECK-NEXT: uzp1 v2.4s, v2.4s, v16.4s
639+
; CHECK-NEXT: uzp1 v7.4s, v17.4s, v7.4s
642640
; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s
643-
; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h
644-
; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h
645-
; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h
646-
; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h
647-
; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b
648-
; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b
649-
; CHECK-NEXT: dup v3.16b, w10
650-
; CHECK-NEXT: dup v2.16b, w9
641+
; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h
642+
; CHECK-NEXT: uzp1 v2.8h, v3.8h, v2.8h
643+
; CHECK-NEXT: uzp1 v3.8h, v7.8h, v6.8h
644+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
645+
; CHECK-NEXT: uzp1 v1.16b, v4.16b, v2.16b
646+
; CHECK-NEXT: uzp1 v0.16b, v3.16b, v0.16b
647+
; CHECK-NEXT: dup v2.16b, w10
648+
; CHECK-NEXT: dup v3.16b, w9
651649
; CHECK-NEXT: adrp x9, .LCPI18_0
652-
; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b
653-
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
650+
; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b
651+
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
654652
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_0]
655653
; CHECK-NEXT: shl v1.16b, v1.16b, #7
656654
; CHECK-NEXT: shl v0.16b, v0.16b, #7
@@ -664,8 +662,8 @@ define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) {
664662
; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
665663
; CHECK-NEXT: addv h1, v1.8h
666664
; CHECK-NEXT: addv h0, v0.8h
667-
; CHECK-NEXT: str h1, [x8]
668-
; CHECK-NEXT: str h0, [x8, #2]
665+
; CHECK-NEXT: str h1, [x8, #2]
666+
; CHECK-NEXT: str h0, [x8]
669667
; CHECK-NEXT: ret
670668
entry:
671669
%0 = call <32 x i1> @llvm.loop.dependence.war.mask.v32i1(ptr %a, ptr %b, i64 8)

llvm/test/CodeGen/AArch64/pr72777.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ define i64 @f(i64 %0, i64 %1) {
55
; CHECK-LABEL: f:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: orr x8, x1, #0x1
8-
; CHECK-NEXT: add x9, x0, x0
98
; CHECK-NEXT: mov x10, #-9223372036854775808 // =0x8000000000000000
10-
; CHECK-NEXT: adds x8, x8, x9
9+
; CHECK-NEXT: add x8, x8, x0
10+
; CHECK-NEXT: adds x8, x8, x0
1111
; CHECK-NEXT: lsl x9, x8, #1
1212
; CHECK-NEXT: cinv x10, x10, pl
1313
; CHECK-NEXT: cmp x8, x9, asr #1

0 commit comments

Comments
 (0)