diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2388375b8c2bb..a330acf340565 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10654,7 +10654,8 @@ class InstructionsCompatibilityAnalysis { static bool isSupportedOpcode(const unsigned Opcode) { return Opcode == Instruction::Add || Opcode == Instruction::LShr || Opcode == Instruction::Shl || Opcode == Instruction::SDiv || - Opcode == Instruction::UDiv; + Opcode == Instruction::UDiv || Opcode == Instruction::And || + Opcode == Instruction::Or || Opcode == Instruction::Xor; } /// Identifies the best candidate value, which represents main opcode @@ -10979,6 +10980,9 @@ class InstructionsCompatibilityAnalysis { case Instruction::Shl: case Instruction::SDiv: case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind); break; default: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll index c5f72f2258023..fded7a4f3f0c7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_reordering_undefs.ll @@ -4,21 +4,9 @@ define i32 @crash_reordering_undefs() { ; CHECK-LABEL: @crash_reordering_undefs( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OR0:%.*]] = or i64 undef, undef -; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i64 undef, [[OR0]] -; CHECK-NEXT: [[ADD0:%.*]] = select i1 [[CMP0]], i32 65536, i32 65537 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 undef, undef -; CHECK-NEXT: [[ADD2:%.*]] = select i1 [[CMP1]], i32 65536, i32 65537 -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 undef, undef -; CHECK-NEXT: [[ADD4:%.*]] = select i1 [[CMP2]], i32 65536, i32 65537 -; CHECK-NEXT: [[OR1:%.*]] = or i64 undef, undef -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 undef, [[OR1]] -; CHECK-NEXT: [[ADD9:%.*]] = select i1 [[CMP3]], i32 65536, i32 65537 +; CHECK-NEXT: [[ADD0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> splat (i32 65537)) ; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 undef, [[ADD0]] -; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[ADD2]], [[ADD4]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[OP_RDX]], [[OP_RDX1]] -; CHECK-NEXT: [[OP_RDX3:%.*]] = add i32 [[OP_RDX2]], [[ADD9]] -; CHECK-NEXT: ret i32 [[OP_RDX3]] +; CHECK-NEXT: ret i32 [[OP_RDX]] ; entry: %or0 = or i64 undef, undef diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll index 3ac0d01cf9a2c..13b050d904624 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll @@ -6,15 +6,15 @@ define i1 @test(i32 %g, i16 %d) { ; CHECK-SAME: i32 [[G:%.*]], i16 [[D:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[D]], 1 -; CHECK-NEXT: [[XOR_I_I:%.*]] = xor i32 [[G]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[G]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[XOR_I_I]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i8> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], splat (i8 -3) ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP12]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll index f07424f0d2934..43302f28d479e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll @@ -3,32 +3,7 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test() { -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 0, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP25]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <24 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP5]], <64 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <64 x i32> [[TMP9]], <64 x i32> , <64 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <64 x i32> [[TMP10]], <64 x i32> [[TMP12]], <64 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <64 x i32> [[TMP13]], <64 x i32> [[TMP15]], <64 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <24 x i32> [[TMP6]], <24 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <64 x i32> [[TMP16]], <64 x i32> [[TMP15]], <64 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <64 x i32> [[TMP27]], <64 x i32> [[TMP28]], <64 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <64 x i32> zeroinitializer, [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <64 x i32> zeroinitializer, [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <64 x i1> [[TMP19]], <64 x i1> [[TMP20]], <64 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = zext <64 x i1> [[TMP21]] to <64 x i8> -; CHECK-NEXT: [[TMP23:%.*]] = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> [[TMP22]]) +; CHECK-NEXT: [[TMP23:%.*]] = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> ) ; CHECK-NEXT: [[TMP24:%.*]] = sext i8 [[TMP23]] to i32 ; CHECK-NEXT: ret i32 [[TMP24]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll index 1fedde4cc9fd7..3e9bd781bfea1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll @@ -3,12 +3,8 @@ define void @test() { ; CHECK-LABEL: define void @test() { -; CHECK-NEXT: [[XOR108_I_I_I:%.*]] = xor i64 0, 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x i64> , i64 [[XOR108_I_I_I]], i32 10 -; CHECK-NEXT: [[TMP2:%.*]] = lshr <12 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <12 x i64> [[TMP2]], <12 x i64> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i64> poison, i64 1, i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> , <16 x i64> [[TMP1]], <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1> ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll index 034fe82862950..c5442b7fb7f13 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll @@ -6,11 +6,10 @@ define void @foo() { ; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 0, i32 0 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> ; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 @@ -24,11 +23,10 @@ define void @foo() { ; ; FORCED-LABEL: define void @foo() { ; FORCED-NEXT: bb: -; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 0, i32 0 ; FORCED-NEXT: br label [[BB1:%.*]] ; FORCED: bb1: ; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] -; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] +; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], zeroinitializer ; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> ; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer ; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll index 2612a21b9eedf..e8078adc27208 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll @@ -5,23 +5,22 @@ define i32 @test(i1 %cond) { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: i1 [[COND:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[OR92:%.*]] = or i32 1, 0 ; CHECK-NEXT: br label %[[BB:.*]] ; CHECK: [[BB]]: -; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OR92:%.*]], %[[BB]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OR92]], %[[BB]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], %[[BB]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[P1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[OR92]] = or i32 1, 0 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> , i32 [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> , i32 [[OR92]], i32 0 -; CHECK-NEXT: [[TMP8]] = xor <2 x i32> [[TMP9]], [[TMP7]] -; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP6]], [[OR92]] +; CHECK-NEXT: [[TMP8]] = xor <2 x i32> [[TMP9]], ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[BB]] ; CHECK: [[EXIT]]: +; CHECK-NEXT: [[OP_RDX:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0 ; CHECK-NEXT: ret i32 [[OP_RDX]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll index 4a5dd2a63723e..b9f8390ac11f4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll @@ -8,42 +8,21 @@ define i16 @test() { ; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 0 ; CHECK-NEXT: [[CALL99_I:%.*]] = call i32 @llvm.bswap.i32(i32 0) ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[CALL99_I]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = shl i32 0, 0 -; CHECK-NEXT: [[UNSCLEAR186_I:%.*]] = and i32 [[TMP6]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 0, 0 ; CHECK-NEXT: [[CALL7_I45:%.*]] = tail call i32 null(i32 0) ; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[CALL7_I45]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 0, 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> , i32 [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = and <2 x i32> [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <24 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <24 x i32> , <24 x i32> [[TMP16]], <24 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <24 x i32> [[TMP17]], <24 x i32> , <24 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <24 x i32> [[TMP18]], i32 [[UNSCLEAR186_I]], i32 10 -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <24 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <24 x i32> [[TMP19]], <24 x i32> [[TMP20]], <24 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <24 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <24 x i32> [[TMP21]], <24 x i32> [[TMP22]], <24 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <24 x i32> [[TMP23]], <24 x i32> , <24 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <24 x i32> [[TMP24]], <24 x i32> , <24 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <24 x i32> [[TMP25]], i32 [[UNSCLEAR186_I]], i32 11 -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <24 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <24 x i32> [[TMP26]], <24 x i32> [[TMP27]], <24 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <24 x i32> [[TMP24]], [[TMP28]] -; CHECK-NEXT: [[RDX_OP:%.*]] = shufflevector <24 x i1> [[TMP29]], <24 x i1> , <28 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = bitcast <28 x i1> [[RDX_OP]] to i28 -; CHECK-NEXT: [[TMP31:%.*]] = call i28 @llvm.ctpop.i28(i28 [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = trunc i28 [[TMP31]] to i16 -; CHECK-NEXT: [[TMP33:%.*]] = call i4 @llvm.ctpop.i4(i4 -8) -; CHECK-NEXT: [[TMP34:%.*]] = zext i4 [[TMP33]] to i16 -; CHECK-NEXT: [[OP_RDX4:%.*]] = add i16 [[TMP34]], [[TMP32]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <28 x i32> , i32 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <28 x i32> [[TMP4]], i32 [[TMP2]], i32 5 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <28 x i32> [[TMP5]], <28 x i32> , <28 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <28 x i32> [[TMP6]], i32 [[TMP8]], i32 12 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <28 x i32> [[TMP7]], <28 x i32> , <28 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <28 x i32> [[TMP16]], <28 x i32> , <28 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <28 x i32> [[TMP9]], <28 x i32> , <28 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = and <28 x i32> , [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <28 x i32> [[TMP11]], <28 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <32 x i32> , [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <32 x i1> [[TMP13]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP14]]) +; CHECK-NEXT: [[OP_RDX4:%.*]] = trunc i32 [[TMP15]] to i16 ; CHECK-NEXT: ret i16 [[OP_RDX4]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/minbitwidth-node-with-multi-users.ll index a7f8629213890..78708a200efd6 100644 --- a/llvm/test/Transforms/SLPVectorizer/minbitwidth-node-with-multi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/minbitwidth-node-with-multi-users.ll @@ -6,20 +6,12 @@ define void @test() { ; CHECK-LABEL: define void @test() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr null, align 2 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 0, 1 ; CHECK-NEXT: [[TMP2:%.*]] = and i32 0, 0 ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> , i8 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> zeroinitializer, zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP15]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i1> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 0, [[TMP14]] ; CHECK-NEXT: store i32 [[OP_RDX]], ptr null, align 4