136 changes: 77 additions & 59 deletions llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s

%struct.Ray = type { %struct.Vec, %struct.Vec }
%struct.Vec = type { double, double, double }
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"

define void @main() {
%struct.Ray.5.11.53.113.119.137.149.185.329.389.416 = type { %struct.Vec.0.6.48.108.114.132.144.180.324.384.414, %struct.Vec.0.6.48.108.114.132.144.180.324.384.414 }
%struct.Vec.0.6.48.108.114.132.144.180.324.384.414 = type { double, double, double }

; Function Attrs: ssp uwtable
define void @main() #0 {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 undef, label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
Expand All @@ -15,7 +19,7 @@ define void @main() {
; CHECK: invoke.cont:
; CHECK-NEXT: br i1 undef, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
; CHECK: arrayctor.cont:
; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_113_119_137_149_185_329_389_416:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER:%.*]]
; CHECK: for.cond36.preheader:
; CHECK-NEXT: br i1 undef, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
Expand All @@ -24,14 +28,18 @@ define void @main() {
; CHECK: cond.true48.us:
; CHECK-NEXT: br i1 undef, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
; CHECK: cond.false66.us:
; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D
; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double poison, double 0xBFA5CC2D1960285F>, double [[ADD_I276_US]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> <double 0.000000e+00, double 1.000000e-01>, [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> <double 0.000000e+00, double undef>, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.400000e+02, double 1.400000e+02>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 5.000000e+01, double 5.200000e+01>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> <double 2.000000e-01, double 3.000000e-01>, [[TMP1]]
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> <double poison, double undef>, double [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP5]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]]
; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
; CHECK-NEXT: unreachable
; CHECK: cond.true63.us:
; CHECK-NEXT: unreachable
Expand All @@ -43,104 +51,114 @@ define void @main() {
entry:
br i1 undef, label %cond.true, label %cond.end

cond.true:
cond.true: ; preds = %entry
unreachable

cond.end:
cond.end: ; preds = %entry
br label %invoke.cont

invoke.cont:
invoke.cont: ; preds = %invoke.cont, %cond.end
br i1 undef, label %arrayctor.cont, label %invoke.cont

arrayctor.cont:
%agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 0, i32 1
%agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 0
%agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 1
arrayctor.cont: ; preds = %invoke.cont
%agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416, ptr undef, i64 0, i32 0, i32 1
%agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416, ptr undef, i64 0, i32 1, i32 0
%agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.416, ptr undef, i64 0, i32 1, i32 1
br label %for.cond36.preheader

for.cond36.preheader:
for.cond36.preheader: ; preds = %_Z5clampd.exit.1, %arrayctor.cont
br i1 undef, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1

cond.false51.us:
cond.false51.us: ; preds = %for.body42.lr.ph.us
unreachable

cond.true48.us:
cond.true48.us: ; preds = %for.body42.lr.ph.us
br i1 undef, label %cond.true63.us, label %cond.false66.us

cond.false66.us:
%add.i276.us = fadd double 0.000000e+00, 0.000001e+00
cond.false66.us: ; preds = %cond.true48.us
%add.i276.us = fadd double 0.000000e+00, undef
%add.i264.us = fadd double %add.i276.us, 0.000000e+00
%add4.i267.us = fadd double 0.1e+00, 0xBFA5CC2D1960285F
%add4.i267.us = fadd double undef, 0xBFA5CC2D1960285F
%mul.i254.us = fmul double %add.i264.us, 1.400000e+02
%mul2.i256.us = fmul double %add4.i267.us, 1.400000e+02
%add.i243.us = fadd double %mul.i254.us, 5.000000e+01
%add4.i246.us = fadd double %mul2.i256.us, 5.200000e+01
%mul.i.i.us = fmul double 0.2e+00, %add.i264.us
%mul2.i.i.us = fmul double 0.3e+00, %add4.i267.us
%mul.i.i.us = fmul double undef, %add.i264.us
%mul2.i.i.us = fmul double undef, %add4.i267.us
store double %add.i243.us, ptr undef, align 8
store double %add4.i246.us, ptr %agg.tmp99208.sroa.1.8.idx388, align 8
store double %mul.i.i.us, ptr %agg.tmp101211.sroa.0.0.idx, align 8
store double %mul2.i.i.us, ptr %agg.tmp101211.sroa.1.8.idx390, align 8
unreachable

cond.true63.us:
cond.true63.us: ; preds = %cond.true48.us
unreachable

for.body42.lr.ph.us:
for.body42.lr.ph.us: ; preds = %for.cond36.preheader
br i1 undef, label %cond.true48.us, label %cond.false51.us

_Z5clampd.exit.1:
_Z5clampd.exit.1: ; preds = %for.cond36.preheader
br label %for.cond36.preheader
}

define void @test() {
; CHECK-LABEL: @test(

%struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601 = type { %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600, %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 }
%struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 = type { double, double, double }

define void @_Z8radianceRK3RayiPt() #0 {
; CHECK-LABEL: @_Z8radianceRK3RayiPt(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
; CHECK: if.then38:
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double 6.000000e-01, double poison>, double 6.000000e-02, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> <double 5.000000e-01, double 8.000000e-01>, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> <double 2.400000e-02, double 0.000000e+00>, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> <double 9.000000e-01, double 9.100000e-01>, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> <double 9.200000e-01, double 9.300000e-01>, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> <double 0x3FEE147AE147AE14, double 0x3FEE666666666666>, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> <double 0x3FEEB851EB851EB8, double 0x3FEF0A3D70A3D70A>, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> <double 0x3FEF5C28F5C28F5C, double 0x3FEFAE147AE147AE>, [[TMP6]]
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double undef, double poison>, double undef, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]]
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8
; CHECK-NEXT: br label [[IF_THEN78]]
; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: if.then78:
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: ret void
;
entry:
br i1 undef, label %if.then78, label %if.then38

if.then38:
%mul.i.i790 = fmul double 0.0, 0.1
%mul3.i.i792 = fmul double 0.2, 0.3
%mul.i764 = fmul double 0.4, %mul3.i.i792
%mul4.i767 = fmul double 0.5, 0.6
if.then38: ; preds = %entry
%mul.i.i790 = fmul double undef, undef
%mul3.i.i792 = fmul double undef, undef
%mul.i764 = fmul double undef, %mul3.i.i792
%mul4.i767 = fmul double undef, undef
%sub.i768 = fsub double %mul.i764, %mul4.i767
%mul6.i770 = fmul double 0.7, %mul.i.i790
%mul9.i772 = fmul double 0.8, %mul3.i.i792
%mul6.i770 = fmul double undef, %mul.i.i790
%mul9.i772 = fmul double undef, %mul3.i.i792
%sub10.i773 = fsub double %mul6.i770, %mul9.i772
%mul.i736 = fmul double 0.9, %sub.i768
%mul2.i738 = fmul double 0.91, %sub10.i773
%mul.i727 = fmul double 0.92, %mul.i736
%mul2.i729 = fmul double 0.93, %mul2.i738
%add.i716 = fadd double 0.94, %mul.i727
%add4.i719 = fadd double 0.95, %mul2.i729
%add.i695 = fadd double 0.96, %add.i716
%add4.i698 = fadd double 0.97, %add4.i719
%mul.i.i679 = fmul double 0.98, %add.i695
%mul2.i.i680 = fmul double 0.99, %add4.i698
%agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 0
%mul.i736 = fmul double undef, %sub.i768
%mul2.i738 = fmul double undef, %sub10.i773
%mul.i727 = fmul double undef, %mul.i736
%mul2.i729 = fmul double undef, %mul2.i738
%add.i716 = fadd double undef, %mul.i727
%add4.i719 = fadd double undef, %mul2.i729
%add.i695 = fadd double undef, %add.i716
%add4.i698 = fadd double undef, %add4.i719
%mul.i.i679 = fmul double undef, %add.i695
%mul2.i.i680 = fmul double undef, %add4.i698
%agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601, ptr undef, i64 0, i32 1, i32 0
store double %mul.i.i679, ptr %agg.tmp74663.sroa.0.0.idx, align 8
%agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray, ptr undef, i64 0, i32 1, i32 1
%agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601, ptr undef, i64 0, i32 1, i32 1
store double %mul2.i.i680, ptr %agg.tmp74663.sroa.1.8.idx943, align 8
br label %if.then78
br label %return

if.then78: ; preds = %entry
br label %return

if.then78:
return: ; preds = %if.then78, %if.then38
ret void
}

attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @g(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret <2 x i8> [[TMP2]]
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]]
; CHECK-NEXT: ret <2 x i8> [[TMP3]]
;
%x0 = extractelement <2 x i8> %x, i32 0
%y1 = extractelement <2 x i8> %y, i32 1
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/Transforms/SLPVectorizer/X86/extract-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

define <2 x i8> @g(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @g(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i8> [[TMP1]], [[TMP1]]
; CHECK-NEXT: ret <2 x i8> [[TMP2]]
; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x i8> [[X:%.*]], i32 0
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x i8> [[Y:%.*]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[X0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[Y1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i8> [[TMP2]], [[TMP2]]
; CHECK-NEXT: ret <2 x i8> [[TMP3]]
;
%x0 = extractelement <2 x i8> %x, i32 0
%y1 = extractelement <2 x i8> %y, i32 1
Expand Down
13 changes: 8 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ define void @fextr1(ptr %ptr) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, ptr undef, align 16
; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], <double 1.200000e+00, double 3.400000e+00>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: store <2 x double> [[SHUFFLE]], ptr [[PTR:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -48,9 +48,12 @@ define void @fextr2(ptr %ptr) {
; CHECK-LABEL: @fextr2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LD:%.*]] = load <4 x double>, ptr undef, align 32
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[LD]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[TMP0]], <double 5.500000e+00, double 6.600000e+00>
; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[PTR:%.*]], align 4
; CHECK-NEXT: [[V0:%.*]] = extractelement <4 x double> [[LD]], i32 0
; CHECK-NEXT: [[V1:%.*]] = extractelement <4 x double> [[LD]], i32 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[V0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 5.500000e+00, double 6.600000e+00>
; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[PTR:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

define float @multi_uses(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: @multi_uses(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]]
; CHECK-NEXT: [[Y1:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[Y1]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X:%.*]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
Expand Down
24 changes: 14 additions & 10 deletions llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,23 @@ define float @f_used_twice_in_tree(<2 x float> %x) {
; CHECK-NEXT: ret float [[ADD]]
;
; THRESH1-LABEL: @f_used_twice_in_tree(
; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]]
; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; THRESH1-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
; THRESH1-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]]
; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f_used_twice_in_tree(
; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]]
; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
; THRESH2-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
; THRESH2-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[SHUFFLE]], [[X]]
; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP4]], [[TMP5]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,32 @@ define i32 @foo(i32 %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[A:%.*]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = mul <2 x i32> [[TMP4]], <i32 3, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[OP_RDX10]], 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i32> [[TMP5]], <i32 3, i32 1>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
; CHECK-NEXT: [[OP_RDX11:%.*]] = add i32 [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[OP_RDX12:%.*]] = add i32 [[OP_RDX11]], 0
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX11]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[OP_RDX12]], [[BB1]] ], [ 0, [[BB2:%.*]] ]
; CHECK-NEXT: ret i32 0
; CHECK: bb4:
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP2]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
; CHECK-NEXT: [[OP_RDX8:%.*]] = add i32 [[TMP10]], 0
; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[OP_RDX8]], [[TMP3]]
; CHECK-NEXT: ret i32 [[OP_RDX9]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE8]]
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
; CHECK-NEXT: [[OP_RDX9:%.*]] = add i32 [[TMP11]], 0
; CHECK-NEXT: [[OP_RDX10:%.*]] = add i32 [[OP_RDX9]], [[TMP2]]
; CHECK-NEXT: ret i32 [[OP_RDX10]]
; CHECK: bb5:
; CHECK-NEXT: br label [[BB4:%.*]]
;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
; PR41892
define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){
; CHECK-LABEL: @test_v4f32_v2f32_store(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -220,8 +220,8 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3
; CHECK-NEXT: [[R021:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R021]], double [[R3]], i64 3
; CHECK-NEXT: ret <4 x double> [[R03]]
;
%a0 = extractelement <4 x double> %a, i64 0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
; PR41892
define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){
; CHECK-LABEL: @test_v4f32_v2f32_store(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> undef, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[F]], <4 x float> undef, <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: store <2 x float> [[TMP3]], ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -220,8 +220,8 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3
; CHECK-NEXT: [[R021:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[R021]], double [[R3]], i64 3
; CHECK-NEXT: ret <4 x double> [[R03]]
;
%a0 = extractelement <4 x double> %a, i64 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,22 @@

define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
; CHECK-LABEL: @simple_select(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[TMP6]]
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[TMP9]]
;
%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 0, i32 1>
; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 2, i32 3>
; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0
Expand Down Expand Up @@ -273,19 +273,37 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
; Unused insertelement
define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_no_users(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[C3]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[TMP10]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
; CHECK-NEXT: ret <4 x float> [[RD1]]
;
%c0 = extractelement <4 x i32> %c, i32 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,10 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
; MINTREESIZE-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
; MINTREESIZE-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
; MINTREESIZE-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; MINTREESIZE-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 0, i32 1>
; MINTREESIZE-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
; MINTREESIZE-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; MINTREESIZE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RD]], <4 x float> [[RD]], <2 x i32> <i32 2, i32 3>
; MINTREESIZE-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
; MINTREESIZE-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
; MINTREESIZE-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[Q4]], i32 0
Expand Down Expand Up @@ -307,19 +307,37 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
; Unused insertelement
define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_no_users(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> undef, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[C3]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <2 x i32> [[TMP10]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> undef, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
; CHECK-NEXT: ret <4 x float> [[RD1]]
;
%c0 = extractelement <4 x i32> %c, i32 0
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/X86/insertelement-postpone.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ define <4 x double> @test(ptr %p2, double %i1754, double %i1781, double %i1778)
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[I1778:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[I1781]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[I1772]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x double> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double poison>, double [[I1797]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: ret <4 x double> [[TMP7]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x double> [[TMP3]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double poison>, double [[I1797]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x double> [[TMP4]], [[TMP5]]
; CHECK-NEXT: ret <4 x double> [[TMP6]]
;
entry:
%i1771 = getelementptr inbounds double, ptr %p2, i64 54
Expand Down
22 changes: 12 additions & 10 deletions llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,43 @@ define void @foo() personality ptr @bar {
; CHECK: bb2.loopexit:
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP10:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ]
; CHECK-NEXT: ret void
; CHECK: bb3:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 0, i32 0, i32 poison) [ "deopt"() ]
; CHECK-NEXT: to label [[BB4:%.*]] unwind label [[BB10:%.*]]
; CHECK: bb4:
; CHECK-NEXT: br i1 poison, label [[BB11:%.*]], label [[BB5:%.*]]
; CHECK: bb5:
; CHECK-NEXT: br label [[BB7:%.*]]
; CHECK: bb6:
; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ <i32 0, i32 poison>, [[BB8:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ <i32 0, i32 poison>, [[BB8:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 1
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb7:
; CHECK-NEXT: [[LOCAL_5_84111:%.*]] = phi i32 [ poison, [[BB8]] ], [ poison, [[BB5]] ]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_5_84111]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = invoke i32 poison(ptr addrspace(1) nonnull poison, i32 poison, i32 poison, i32 poison) [ "deopt"() ]
; CHECK-NEXT: to label [[BB8]] unwind label [[BB12:%.*]]
; CHECK: bb8:
; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]]
; CHECK: bb9:
; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP8]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BB10]] ], [ [[TMP12:%.*]], [[BB12]] ]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP10]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb10:
; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ]
; CHECK-NEXT: [[TMP11]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ]
; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: cleanup
; CHECK-NEXT: br label [[BB9]]
; CHECK: bb11:
; CHECK-NEXT: ret void
; CHECK: bb12:
; CHECK-NEXT: [[TMP10]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ]
; CHECK-NEXT: [[TMP12]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ]
; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 }
; CHECK-NEXT: cleanup
; CHECK-NEXT: br label [[BB9]]
Expand Down
254 changes: 132 additions & 122 deletions llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll

Large diffs are not rendered by default.

85 changes: 51 additions & 34 deletions llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,40 +103,57 @@ define i64 @test_3() #0 {
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2:%.*]] ]
; CHECK-NEXT: [[VAL4:%.*]] = phi i32 [ undef, [[BB1]] ], [ undef, [[BB2]] ]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> poison, i32 [[VAL4]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP1]])
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX7:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX8:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX9:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX10:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX11:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX12:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX13:%.*]] = mul i32 [[VAL4]], [[VAL4]]
; CHECK-NEXT: [[OP_RDX14:%.*]] = mul i32 [[OP_RDX]], [[OP_RDX1]]
; CHECK-NEXT: [[OP_RDX15:%.*]] = mul i32 [[OP_RDX2]], [[OP_RDX3]]
; CHECK-NEXT: [[OP_RDX16:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]]
; CHECK-NEXT: [[OP_RDX17:%.*]] = mul i32 [[OP_RDX6]], [[OP_RDX7]]
; CHECK-NEXT: [[OP_RDX18:%.*]] = mul i32 [[OP_RDX8]], [[OP_RDX9]]
; CHECK-NEXT: [[OP_RDX19:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]]
; CHECK-NEXT: [[OP_RDX20:%.*]] = mul i32 [[OP_RDX12]], [[OP_RDX13]]
; CHECK-NEXT: [[OP_RDX21:%.*]] = mul i32 [[OP_RDX14]], [[OP_RDX15]]
; CHECK-NEXT: [[OP_RDX22:%.*]] = mul i32 [[OP_RDX16]], [[OP_RDX17]]
; CHECK-NEXT: [[OP_RDX23:%.*]] = mul i32 [[OP_RDX18]], [[OP_RDX19]]
; CHECK-NEXT: [[OP_RDX24:%.*]] = mul i32 [[OP_RDX20]], [[VAL]]
; CHECK-NEXT: [[OP_RDX25:%.*]] = mul i32 [[OP_RDX21]], [[OP_RDX22]]
; CHECK-NEXT: [[OP_RDX26:%.*]] = mul i32 [[OP_RDX23]], [[OP_RDX24]]
; CHECK-NEXT: [[OP_RDX27:%.*]] = mul i32 [[OP_RDX25]], [[OP_RDX26]]
; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX27]]
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ undef, [[BB1]] ], [ poison, [[BB2:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <32 x i32> poison, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <32 x i32> [[TMP3]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <32 x i32> [[TMP4]], i32 [[TMP2]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <32 x i32> [[TMP5]], i32 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <32 x i32> [[TMP6]], i32 [[TMP2]], i32 4
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <32 x i32> [[TMP7]], i32 [[TMP2]], i32 5
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <32 x i32> [[TMP8]], i32 [[TMP2]], i32 6
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <32 x i32> [[TMP9]], i32 [[TMP2]], i32 7
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <32 x i32> [[TMP10]], i32 [[TMP2]], i32 8
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <32 x i32> [[TMP11]], i32 [[TMP2]], i32 9
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 [[TMP2]], i32 10
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <32 x i32> [[TMP13]], i32 [[TMP2]], i32 11
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <32 x i32> [[TMP14]], i32 [[TMP2]], i32 12
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <32 x i32> [[TMP15]], i32 [[TMP2]], i32 13
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <32 x i32> [[TMP16]], i32 [[TMP2]], i32 14
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <32 x i32> [[TMP17]], i32 [[TMP2]], i32 15
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <32 x i32> [[TMP18]], i32 [[TMP2]], i32 16
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <32 x i32> [[TMP19]], i32 [[TMP2]], i32 17
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <32 x i32> [[TMP20]], i32 [[TMP2]], i32 18
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <32 x i32> [[TMP21]], i32 [[TMP2]], i32 19
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <32 x i32> [[TMP22]], i32 [[TMP2]], i32 20
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <32 x i32> [[TMP23]], i32 [[TMP2]], i32 21
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <32 x i32> [[TMP24]], i32 [[TMP2]], i32 22
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <32 x i32> [[TMP25]], i32 [[TMP2]], i32 23
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <32 x i32> [[TMP26]], i32 [[TMP2]], i32 24
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <32 x i32> [[TMP27]], i32 [[TMP2]], i32 25
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <32 x i32> [[TMP28]], i32 [[TMP2]], i32 26
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <32 x i32> [[TMP29]], i32 [[TMP2]], i32 27
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <32 x i32> [[TMP30]], i32 [[TMP2]], i32 28
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <32 x i32> [[TMP31]], i32 [[TMP2]], i32 29
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <32 x i32> [[TMP32]], i32 [[TMP2]], i32 30
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <32 x i32> [[TMP33]], i32 [[TMP2]], i32 31
; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP34]])
; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> [[TMP1]])
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP35]], [[TMP36]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[TMP2]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX7:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]]
; CHECK-NEXT: [[OP_RDX8:%.*]] = mul i32 [[OP_RDX3]], [[OP_RDX4]]
; CHECK-NEXT: [[OP_RDX9:%.*]] = mul i32 [[OP_RDX5]], [[OP_RDX6]]
; CHECK-NEXT: [[OP_RDX10:%.*]] = mul i32 [[OP_RDX7]], [[OP_RDX8]]
; CHECK-NEXT: [[OP_RDX11:%.*]] = mul i32 [[OP_RDX9]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX12:%.*]] = mul i32 [[OP_RDX10]], [[OP_RDX11]]
; CHECK-NEXT: [[VAL64:%.*]] = add i32 undef, [[OP_RDX12]]
; CHECK-NEXT: [[VAL65:%.*]] = sext i32 [[VAL64]] to i64
; CHECK-NEXT: ret i64 [[VAL65]]
;
Expand Down
22 changes: 15 additions & 7 deletions llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,11 @@ define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, float %T) {
; PR41892
define void @test_v4f32_v2f32_store(<4 x float> %f, ptr %p){
; CHECK-LABEL: @test_v4f32_v2f32_store(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[P:%.*]], align 4
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
%x0 = extractelement <4 x float> %f, i64 0
Expand Down Expand Up @@ -92,10 +95,13 @@ define void @test_v4f32_v2f32_splat_store(<4 x float> %f, ptr %p){

define void @test_v4f32_v3f32_store(<4 x float> %f, ptr %p){
; CHECK-LABEL: @test_v4f32_v3f32_store(
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F:%.*]], i64 2
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1
; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 2
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[P]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[X0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[X1]], i32 1
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[P]], align 4
; CHECK-NEXT: store float [[X2]], ptr [[P2]], align 4
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -150,8 +156,10 @@ define void @test_v4f32_v4f32_store(<4 x float> %f, ptr %p){

define void @test_v4f32_v4f32_splat_store(<4 x float> %f, ptr %p){
; CHECK-LABEL: @test_v4f32_v4f32_splat_store(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[F:%.*]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[P:%.*]], align 4
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: store <4 x float> [[SHUFFLE]], ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
%x0 = extractelement <4 x float> %f, i64 0
Expand Down
135 changes: 64 additions & 71 deletions llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll

Large diffs are not rendered by default.

135 changes: 64 additions & 71 deletions llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ define dso_local <4 x float> @foo(<4 x i32> %0) {
; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
Expand Down
114 changes: 65 additions & 49 deletions llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,19 @@ define i1 @logical_or_fcmp(<4 x float> %x) {
define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
; SSE-LABEL: @logical_and_icmp_diff_preds(
; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
; SSE-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
; SSE-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 3, i32 1>
; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP3]], i1 false
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X3]], i32 0
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[X1]], i32 1
; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], zeroinitializer
; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP4]], i1 false
; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP4]], i1 false
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false
; SSE-NEXT: ret i1 [[S3]]
;
; AVX-LABEL: @logical_and_icmp_diff_preds(
Expand Down Expand Up @@ -184,13 +187,16 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) {

define i1 @logical_and_icmp_subvec(<4 x i32> %x) {
; SSE-LABEL: @logical_and_icmp_subvec(
; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
; SSE-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X1]], i32 0
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[X0]], i32 1
; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], zeroinitializer
; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; SSE-NEXT: [[S1:%.*]] = select i1 [[TMP4]], i1 [[TMP3]], i1 false
; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
; SSE-NEXT: [[S1:%.*]] = select i1 [[TMP5]], i1 [[TMP4]], i1 false
; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
; SSE-NEXT: ret i1 [[S2]]
;
Expand Down Expand Up @@ -222,13 +228,13 @@ define i1 @logical_and_icmp_subvec(<4 x i32> %x) {

define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_clamp(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
; CHECK-NEXT: ret i1 [[TMP6]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]])
; CHECK-NEXT: ret i1 [[TMP5]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
Expand All @@ -254,15 +260,15 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) {

define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
; CHECK-NEXT: call void @use1(i1 [[TMP5]])
; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
; CHECK-NEXT: ret i1 [[TMP7]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
; CHECK-NEXT: call void @use1(i1 [[TMP4]])
; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
; CHECK-NEXT: ret i1 [[TMP6]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
Expand Down Expand Up @@ -329,20 +335,27 @@ define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {

define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: @logical_and_icmp_clamp_v8i32(
; CHECK-NEXT: [[X0:%.*]] = extractelement <8 x i32> [[X:%.*]], i32 0
; CHECK-NEXT: [[X1:%.*]] = extractelement <8 x i32> [[X]], i32 1
; CHECK-NEXT: [[X2:%.*]] = extractelement <8 x i32> [[X]], i32 2
; CHECK-NEXT: [[X3:%.*]] = extractelement <8 x i32> [[X]], i32 3
; CHECK-NEXT: [[Y0:%.*]] = extractelement <8 x i32> [[Y:%.*]], i32 0
; CHECK-NEXT: [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
; CHECK-NEXT: [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
; CHECK-NEXT: [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y1]], i32 5
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y2]], i32 6
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y3]], i32 7
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP2]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]])
; CHECK-NEXT: ret i1 [[TMP9]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[X0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[X1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[X2]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[X3]], i32 3
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[Y1]], i32 5
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[Y2]], i32 6
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[Y3]], i32 7
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <8 x i1> [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP10]])
; CHECK-NEXT: ret i1 [[TMP11]]
;
%x0 = extractelement <8 x i32> %x, i32 0
%x1 = extractelement <8 x i32> %x, i32 1
Expand Down Expand Up @@ -373,18 +386,21 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
; SSE-LABEL: @logical_and_icmp_clamp_partial(
; SSE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], <i32 42, i32 42>
; SSE-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 1
; SSE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 0
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i32 1
; SSE-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[TMP5]], <i32 42, i32 42>
; SSE-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP7]], i1 false
; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP8]], i1 [[C2]], i1 false
; SSE-NEXT: [[TMP9:%.*]] = freeze i1 [[OP_RDX]]
; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP9]], i1 [[OP_RDX1]], i1 false
; SSE-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
; SSE-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
; SSE-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP8]])
; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP9]], i1 [[TMP10]], i1 false
; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1
; SSE-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP11]], i1 [[C2]], i1 false
; SSE-NEXT: [[TMP12:%.*]] = freeze i1 [[OP_RDX]]
; SSE-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP12]], i1 [[OP_RDX1]], i1 false
; SSE-NEXT: ret i1 [[OP_RDX2]]
;
; AVX-LABEL: @logical_and_icmp_clamp_partial(
Expand Down
12 changes: 9 additions & 3 deletions llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@ define i64 @test() {
; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB2:%.*]] ], [ 0, [[BB1:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[BB2]] ], [ 0, [[BB1]] ]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]])
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[TMP4]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[TMP4]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP4]], i32 3
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP4]], i32 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP44]], i32 [[TMP4]], i32 5
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 6
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP4]], i32 7
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP7]])
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP8]], [[TMP4]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[TMP4]], [[TMP4]]
; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[OP_RDX]], [[OP_RDX1]]
; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[OP_RDX2]], [[TMP]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ define void @fextr(ptr %ptr) {
; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, ptr undef, align 16
; CHECK-NEXT: br label [[T:%.*]]
; CHECK: t:
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[LD]], [[TMP0]]
; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[PTR:%.*]], align 2
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
; CHECK-NEXT: store <8 x i16> [[TMP0]], ptr [[PTR:%.*]], align 2
; CHECK-NEXT: ret void
;
; YAML: Pass: slp-vectorizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,23 @@

define void @test(ptr %p, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[E:%.*]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[F:%.*]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[C:%.*]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[D:%.*]], i32 3
; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP4]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], [[TMP9]]
; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[P:%.*]], align 4
; CHECK-NEXT: [[S1:%.*]] = sub i32 [[A:%.*]], 1
; CHECK-NEXT: [[S2:%.*]] = sub i32 [[B:%.*]], 1
; CHECK-NEXT: [[M1:%.*]] = mul i32 [[S1]], [[E:%.*]]
; CHECK-NEXT: [[M2:%.*]] = mul i32 [[S2]], [[F:%.*]]
; CHECK-NEXT: [[M3:%.*]] = mul i32 [[S1]], [[C:%.*]]
; CHECK-NEXT: [[M4:%.*]] = mul i32 [[S2]], [[D:%.*]]
; CHECK-NEXT: [[A1:%.*]] = add i32 [[A]], [[M1]]
; CHECK-NEXT: [[A2:%.*]] = add i32 [[B]], [[M2]]
; CHECK-NEXT: [[A3:%.*]] = add i32 [[C]], [[M3]]
; CHECK-NEXT: [[A4:%.*]] = add i32 [[D]], [[M4]]
; CHECK-NEXT: store i32 [[A1]], ptr [[P:%.*]], align 4
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 1
; CHECK-NEXT: store i32 [[A2]], ptr [[GEP]], align 4
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 2
; CHECK-NEXT: store i32 [[A3]], ptr [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 3
; CHECK-NEXT: store i32 [[A4]], ptr [[GEP2]], align 4
; CHECK-NEXT: ret void
;
%s1 = sub i32 %a, 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-14 | FileCheck %s
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-13 | FileCheck %s

define void @test(i1 %c, ptr %arg) {
; CHECK-LABEL: @test(
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ define void @test(ptr %a, ptr %b) {
; CHECK: for.body:
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr null, align 4
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP9]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <4 x float> [[TMP12]], zeroinitializer
; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[RESULT]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP6]]
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[SHUFFLE]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[SHUFFLE2]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[SHUFFLE1]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP9]], zeroinitializer
; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[RESULT]], align 4
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
Expand Down
20 changes: 11 additions & 9 deletions llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,24 @@ define void @foo() {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8
; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]]
; CHECK: bb4:
; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double>
; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double
; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]]
; CHECK-NEXT: [[SUB1:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> <double poison, double poison, double undef, double undef>, double [[SUB1]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x double> [[TMP6]], [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <4 x double> [[TMP6]] to <4 x float>
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP2]], <4 x float> [[TMP8]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP3]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[CONV2]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP10]] = phi <4 x float> [ [[TMP9]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
; CHECK-NEXT: br label [[BB2]]
;
entry:
Expand Down