Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@ define i32 @diamond_broadcast(i32* noalias nocapture %B, i32* noalias nocapture
; CHECK-LABEL: @diamond_broadcast(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: ret i32 0
;
Expand All @@ -35,13 +32,10 @@ define i32 @diamond_broadcast2(i32* noalias nocapture %B, i32* noalias nocapture
; CHECK-LABEL: @diamond_broadcast2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: ret i32 0
;
Expand All @@ -65,13 +59,10 @@ define i32 @diamond_broadcast3(i32* noalias nocapture %B, i32* noalias nocapture
; CHECK-LABEL: @diamond_broadcast3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: ret i32 0
;
Expand Down
14 changes: 0 additions & 14 deletions llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,11 @@ target triple = "x86_64-unknown-linux-gnu"
define void @PR28457(double* noalias nocapture align 32 %q, double* noalias nocapture readonly align 32 %p) {
; SSE-LABEL: @PR28457(
; SSE-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 0
; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[P]], i64 1
; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
; SSE-NEXT: [[P4:%.*]] = getelementptr inbounds double, double* [[P]], i64 4
; SSE-NEXT: [[P5:%.*]] = getelementptr inbounds double, double* [[P]], i64 5
; SSE-NEXT: [[Q0:%.*]] = getelementptr inbounds double, double* [[Q:%.*]], i64 0
; SSE-NEXT: [[Q1:%.*]] = getelementptr inbounds double, double* [[Q]], i64 1
; SSE-NEXT: [[Q2:%.*]] = getelementptr inbounds double, double* [[Q]], i64 2
; SSE-NEXT: [[Q3:%.*]] = getelementptr inbounds double, double* [[Q]], i64 3
; SSE-NEXT: [[Q4:%.*]] = getelementptr inbounds double, double* [[Q]], i64 4
; SSE-NEXT: [[Q5:%.*]] = getelementptr inbounds double, double* [[Q]], i64 5
; SSE-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>*
; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 1.000000e+00, double 1.000000e+00>
Expand All @@ -43,17 +37,9 @@ define void @PR28457(double* noalias nocapture align 32 %q, double* noalias noca
;
; AVX-LABEL: @PR28457(
; AVX-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 0
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[P]], i64 1
; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds double, double* [[P]], i64 4
; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds double, double* [[P]], i64 5
; AVX-NEXT: [[Q0:%.*]] = getelementptr inbounds double, double* [[Q:%.*]], i64 0
; AVX-NEXT: [[Q1:%.*]] = getelementptr inbounds double, double* [[Q]], i64 1
; AVX-NEXT: [[Q2:%.*]] = getelementptr inbounds double, double* [[Q]], i64 2
; AVX-NEXT: [[Q3:%.*]] = getelementptr inbounds double, double* [[Q]], i64 3
; AVX-NEXT: [[Q4:%.*]] = getelementptr inbounds double, double* [[Q]], i64 4
; AVX-NEXT: [[Q5:%.*]] = getelementptr inbounds double, double* [[Q]], i64 5
; AVX-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <4 x double>*
; AVX-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 8
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
Expand Down
68 changes: 16 additions & 52 deletions llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,8 @@

define double @dot4f64(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) {
; CHECK-LABEL: @dot4f64(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2
; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 3
; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 3
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 2
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <2 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <2 x double>*
Expand Down Expand Up @@ -61,12 +57,8 @@ define double @dot4f64(double* dereferenceable(32) %ptrx, double* dereferenceabl

define float @dot4f32(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
; CHECK-LABEL: @dot4f32(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2
; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 3
; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 3
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 2
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <2 x float>*
Expand Down Expand Up @@ -112,15 +104,9 @@ define float @dot4f32(float* dereferenceable(16) %ptrx, float* dereferenceable(1

define double @dot4f64_fast(double* dereferenceable(32) %ptrx, double* dereferenceable(32) %ptry) {
; CHECK-LABEL: @dot4f64_fast(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2
; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 3
; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <4 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <4 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <4 x double>*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <4 x double>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]])
Expand Down Expand Up @@ -152,15 +138,9 @@ define double @dot4f64_fast(double* dereferenceable(32) %ptrx, double* dereferen

define float @dot4f32_fast(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
; CHECK-LABEL: @dot4f32_fast(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2
; CHECK-NEXT: [[PTRX3:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 3
; CHECK-NEXT: [[PTRY3:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
Expand Down Expand Up @@ -198,8 +178,6 @@ define double @dot3f64(double* dereferenceable(32) %ptrx, double* dereferenceabl
; CHECK-LABEL: @dot3f64(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2
; CHECK-NEXT: [[X0:%.*]] = load double, double* [[PTRX]], align 4
; CHECK-NEXT: [[Y0:%.*]] = load double, double* [[PTRY]], align 4
; CHECK-NEXT: [[MUL0:%.*]] = fmul double [[X0]], [[Y0]]
Expand Down Expand Up @@ -236,8 +214,6 @@ define float @dot3f32(float* dereferenceable(16) %ptrx, float* dereferenceable(1
; CHECK-LABEL: @dot3f32(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2
; CHECK-NEXT: [[X0:%.*]] = load float, float* [[PTRX]], align 4
; CHECK-NEXT: [[Y0:%.*]] = load float, float* [[PTRY]], align 4
; CHECK-NEXT: [[MUL0:%.*]] = fmul float [[X0]], [[Y0]]
Expand Down Expand Up @@ -274,8 +250,6 @@ define double @dot3f64_fast(double* dereferenceable(32) %ptrx, double* dereferen
; CHECK-LABEL: @dot3f64_fast(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, double* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, double* [[PTRY]], i64 2
; CHECK-NEXT: [[X0:%.*]] = load double, double* [[PTRX]], align 4
; CHECK-NEXT: [[Y0:%.*]] = load double, double* [[PTRY]], align 4
; CHECK-NEXT: [[MUL0:%.*]] = fmul double [[X0]], [[Y0]]
Expand Down Expand Up @@ -312,8 +286,6 @@ define float @dot3f32_fast(float* dereferenceable(16) %ptrx, float* dereferencea
; CHECK-LABEL: @dot3f32_fast(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, float* [[PTRX]], i64 2
; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, float* [[PTRY]], i64 2
; CHECK-NEXT: [[X0:%.*]] = load float, float* [[PTRX]], align 4
; CHECK-NEXT: [[Y0:%.*]] = load float, float* [[PTRY]], align 4
; CHECK-NEXT: [[MUL0:%.*]] = fmul float [[X0]], [[Y0]]
Expand Down Expand Up @@ -352,11 +324,9 @@ define float @dot3f32_fast(float* dereferenceable(16) %ptrx, float* dereferencea

define double @dot2f64(double* dereferenceable(16) %ptrx, double* dereferenceable(16) %ptry) {
; CHECK-LABEL: @dot2f64(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <2 x double>*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
Expand All @@ -378,11 +348,9 @@ define double @dot2f64(double* dereferenceable(16) %ptrx, double* dereferenceabl

define float @dot2f32(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
; CHECK-LABEL: @dot2f32(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <2 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
Expand All @@ -404,11 +372,9 @@ define float @dot2f32(float* dereferenceable(16) %ptrx, float* dereferenceable(1

define double @dot2f64_fast(double* dereferenceable(16) %ptrx, double* dereferenceable(16) %ptry) {
; CHECK-LABEL: @dot2f64_fast(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds double, double* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds double, double* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTRX:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY]] to <2 x double>*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTRY:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
Expand All @@ -430,11 +396,9 @@ define double @dot2f64_fast(double* dereferenceable(16) %ptrx, double* dereferen

define float @dot2f32_fast(float* dereferenceable(16) %ptrx, float* dereferenceable(16) %ptry) {
; CHECK-LABEL: @dot2f32_fast(
; CHECK-NEXT: [[PTRX1:%.*]] = getelementptr inbounds float, float* [[PTRX:%.*]], i64 1
; CHECK-NEXT: [[PTRY1:%.*]] = getelementptr inbounds float, float* [[PTRY:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTRX:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY]] to <2 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[PTRY:%.*]] to <2 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@ define void @hoge(i64 %idx, <4 x i32>* %sink) {
; CHECK-LABEL: @hoge(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX:%.*]], i64 5
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 6
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 7
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: ret void
;
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@ define void @hoge(i64 %idx, <4 x i32>* %sink) {
; CHECK-LABEL: @hoge(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX:%.*]], i64 5
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 6
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 7
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [20 x [13 x i32]], [20 x [13 x i32]]* @array, i64 0, i64 [[IDX]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[SINK:%.*]], align 16
; CHECK-NEXT: ret void
;
bb:
Expand Down
17 changes: 3 additions & 14 deletions llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ define i32 @fn1() {
; CHECK-LABEL: @fn1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 11, i64 56>
Expand All @@ -36,24 +35,15 @@ declare float @llvm.powi.f32.i32(float, i32)
define void @fn2(i32* %a, i32* %b, float* %c) {
; CHECK-LABEL: @fn2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP5]], i32 [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[C]] to <4 x float>*
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -103,7 +93,6 @@ define void @externally_used_ptrs() {
; CHECK-LABEL: @externally_used_ptrs(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 56, i64 11>
Expand Down
24 changes: 4 additions & 20 deletions llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,7 @@ define void @fmaxnum_16f32() #0 {

define float @reduction_v4f32_fast(float* %p) {
; CHECK-LABEL: @reduction_v4f32_fast(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
Expand All @@ -363,10 +360,7 @@ define float @reduction_v4f32_fast(float* %p) {

define float @reduction_v4f32_nnan(float* %p) {
; CHECK-LABEL: @reduction_v4f32_nnan(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
Expand Down Expand Up @@ -415,14 +409,7 @@ define float @reduction_v4f32_not_fast(float* %p) {

define float @reduction_v8f32_fast(float* %p) {
; CHECK-LABEL: @reduction_v8f32_fast(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4
; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5
; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6
; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <8 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <8 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
Expand Down Expand Up @@ -469,10 +456,7 @@ define double @reduction_v2f64_fast(double* %p) {

define double @reduction_v4f64_fast(double* %p) {
; CHECK-LABEL: @reduction_v4f64_fast(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P]] to <4 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P:%.*]] to <4 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[TMP2]])
; CHECK-NEXT: ret double [[TMP3]]
Expand Down
24 changes: 4 additions & 20 deletions llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,7 @@ define void @fminnum_16f32() #0 {

define float @reduction_v4f32_fast(float* %p) {
; CHECK-LABEL: @reduction_v4f32_fast(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
Expand All @@ -363,10 +360,7 @@ define float @reduction_v4f32_fast(float* %p) {

define float @reduction_v4f32_nnan(float* %p) {
; CHECK-LABEL: @reduction_v4f32_nnan(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
Expand Down Expand Up @@ -415,14 +409,7 @@ define float @reduction_v4f32_wrong_fmf(float* %p) {

define float @reduction_v8f32_fast(float* %p) {
; CHECK-LABEL: @reduction_v8f32_fast(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4
; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5
; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6
; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <8 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <8 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> [[TMP2]])
; CHECK-NEXT: ret float [[TMP3]]
Expand Down Expand Up @@ -469,10 +456,7 @@ define double @reduction_v2f64_fast(double* %p) {

define double @reduction_v4f64_fast(double* %p) {
; CHECK-LABEL: @reduction_v4f64_fast(
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P]] to <4 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P:%.*]] to <4 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[TMP2]])
; CHECK-NEXT: ret double [[TMP3]]
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/Transforms/SLPVectorizer/X86/funclet.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@ define void @test1(double* %a, double* %b, double* %c) #0 personality i32 (...)*
; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
; CHECK: catch:
; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [i8* null, i32 64, i8* null]
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[A]] to <2 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B]] to <2 x double>*
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP3]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP6]]) [ "funclet"(token [[TMP1]]) ]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[C]] to <2 x double>*
; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
; CHECK-NEXT: catchret from [[TMP1]] to label [[TRY_CONT:%.*]]
; CHECK: try.cont:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ define void @test(i16 %0) {
; CHECK-NEXT: br label [[FOR_BODY92:%.*]]
; CHECK: for.body92:
; CHECK-NEXT: [[SUM_MVR_I:%.*]] = getelementptr i32, i32* undef, i32 0
; CHECK-NEXT: [[SUM_MVR_ABS_I:%.*]] = getelementptr i32, i32* undef, i32 2
; CHECK-NEXT: [[SUM_MVC_I:%.*]] = getelementptr i32, i32* undef, i32 1
; CHECK-NEXT: [[SUM_MVC_ABS_I:%.*]] = getelementptr i32, i32* undef, i32 3
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[SUM_MVR_I]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 8
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/Transforms/SLPVectorizer/X86/gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,11 @@ define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
; CHECK-LABEL: @foo1(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y:%.*]], i64 0, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X:%.*]], i64 0, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[Y]], i64 0, i32 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* [[X]], i64 0, i32 1
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>*
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, <2 x i32*> [[TMP6]], <2 x i64> <i64 16, i64 16>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>*
; CHECK-NEXT: store <2 x i32*> [[TMP7]], <2 x i32*>* [[TMP8]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP3]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, <2 x i32*> [[TMP4]], <2 x i64> <i64 16, i64 16>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>*
; CHECK-NEXT: store <2 x i32*> [[TMP5]], <2 x i32*>* [[TMP6]], align 8
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
Expand Down
266 changes: 10 additions & 256 deletions llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

Large diffs are not rendered by default.

58 changes: 6 additions & 52 deletions llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1245,40 +1245,19 @@ define i32 @smax_intrinsic_rdx_v8i32(i32* %p0) {
; SSE-NEXT: ret i32 [[M]]
;
; AVX-LABEL: @smax_intrinsic_rdx_v8i32(
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4
; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5
; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6
; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7
; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>*
; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>*
; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
; AVX-NEXT: ret i32 [[TMP3]]
;
; AVX2-LABEL: @smax_intrinsic_rdx_v8i32(
; AVX2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
; AVX2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
; AVX2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
; AVX2-NEXT: [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4
; AVX2-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5
; AVX2-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6
; AVX2-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7
; AVX2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>*
; AVX2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>*
; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
; AVX2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
; AVX2-NEXT: ret i32 [[TMP3]]
;
; THRESH-LABEL: @smax_intrinsic_rdx_v8i32(
; THRESH-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1
; THRESH-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2
; THRESH-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3
; THRESH-NEXT: [[P4:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 4
; THRESH-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5
; THRESH-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6
; THRESH-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7
; THRESH-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>*
; THRESH-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <8 x i32>*
; THRESH-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
; THRESH-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
; THRESH-NEXT: ret i32 [[TMP3]]
Expand Down Expand Up @@ -1310,14 +1289,7 @@ define i32 @smax_intrinsic_rdx_v8i32(i32* %p0) {

define i16 @smin_intrinsic_rdx_v8i16(i16* %p0) {
; CHECK-LABEL: @smin_intrinsic_rdx_v8i16(
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP2]])
; CHECK-NEXT: ret i16 [[TMP3]]
Expand Down Expand Up @@ -1362,10 +1334,7 @@ define i64 @umax_intrinsic_rdx_v4i64(i64* %p0) {
; DEFAULT-NEXT: ret i64 [[M]]
;
; THRESH-LABEL: @umax_intrinsic_rdx_v4i64(
; THRESH-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1
; THRESH-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2
; THRESH-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3
; THRESH-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <4 x i64>*
; THRESH-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0:%.*]] to <4 x i64>*
; THRESH-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 4
; THRESH-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP2]])
; THRESH-NEXT: ret i64 [[TMP3]]
Expand All @@ -1385,22 +1354,7 @@ define i64 @umax_intrinsic_rdx_v4i64(i64* %p0) {

define i8 @umin_intrinsic_rdx_v16i8(i8* %p0) {
; CHECK-LABEL: @umin_intrinsic_rdx_v16i8(
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7
; CHECK-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8
; CHECK-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9
; CHECK-NEXT: [[PA:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10
; CHECK-NEXT: [[PB:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11
; CHECK-NEXT: [[PC:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12
; CHECK-NEXT: [[PD:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
; CHECK-NEXT: [[PE:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
; CHECK-NEXT: [[PF:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP2]])
; CHECK-NEXT: ret i8 [[TMP3]]
Expand Down
87 changes: 9 additions & 78 deletions llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ define i32 @add_red(float* %A, i32 %n) {
; ALL-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ]
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2
; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
; ALL-NEXT: [[ADD28:%.*]] = or i64 [[MUL]], 1
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD28]]
; ALL-NEXT: [[ADD829:%.*]] = or i64 [[MUL]], 2
; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD829]]
; ALL-NEXT: [[ADD1330:%.*]] = or i64 [[MUL]], 3
; ALL-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]]
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; ALL-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
Expand Down Expand Up @@ -110,10 +104,7 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
; ALL-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0
; ALL-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
; ALL: for.body.lr.ph:
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
; ALL-NEXT: br label [[FOR_BODY:%.*]]
Expand All @@ -122,12 +113,6 @@ define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
; ALL-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ]
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
; ALL-NEXT: [[ADD35:%.*]] = or i64 [[MUL]], 1
; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
; ALL-NEXT: [[ADD1136:%.*]] = or i64 [[MUL]], 2
; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1136]]
; ALL-NEXT: [[ADD1737:%.*]] = or i64 [[MUL]], 3
; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1737]]
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
; ALL-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
Expand Down Expand Up @@ -216,14 +201,7 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
; ALL-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0
; ALL-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
; ALL: for.body.lr.ph:
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
; ALL-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
; ALL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; ALL-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[B]], i64 4
; ALL-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[B]], i64 5
; ALL-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds float, float* [[B]], i64 6
; ALL-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds float, float* [[B]], i64 7
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <8 x float>*
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <8 x float>*
; ALL-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
; ALL-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8
; ALL-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4
Expand All @@ -234,20 +212,6 @@ define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
; ALL-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ]
; ALL-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
; ALL-NEXT: [[ADD80:%.*]] = or i64 [[MUL]], 1
; ALL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD80]]
; ALL-NEXT: [[ADD11:%.*]] = add nsw i64 [[MUL]], 2
; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD11]]
; ALL-NEXT: [[ADD17:%.*]] = add nsw i64 [[MUL]], 3
; ALL-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD17]]
; ALL-NEXT: [[ADD23:%.*]] = add nsw i64 [[MUL]], 4
; ALL-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD23]]
; ALL-NEXT: [[ADD29:%.*]] = add nsw i64 [[MUL]], 5
; ALL-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD29]]
; ALL-NEXT: [[ADD35:%.*]] = add nsw i64 [[MUL]], 6
; ALL-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD35]]
; ALL-NEXT: [[ADD41:%.*]] = add nsw i64 [[MUL]], 7
; ALL-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD41]]
; ALL-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
; ALL-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
; ALL-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
Expand Down Expand Up @@ -371,10 +335,7 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
; ALL-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0
; ALL-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
; ALL: for.body.lr.ph:
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
; ALL-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
; ALL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B]] to <4 x float>*
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
; ALL-NEXT: br label [[FOR_BODY:%.*]]
Expand All @@ -383,12 +344,6 @@ define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
; ALL-NEXT: [[ADD638:%.*]] = or i64 [[MUL]], 1
; ALL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD638]]
; ALL-NEXT: [[ADD1239:%.*]] = or i64 [[MUL]], 2
; ALL-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1239]]
; ALL-NEXT: [[ADD1840:%.*]] = or i64 [[MUL]], 3
; ALL-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1840]]
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
Expand Down Expand Up @@ -650,17 +605,14 @@ define void @store_red_double(double* noalias %A, double* noalias %B, double* no
; STORE-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0
; STORE-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
; STORE: for.body.lr.ph:
; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1
; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B]] to <2 x double>*
; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
; STORE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
; STORE-NEXT: br label [[FOR_BODY:%.*]]
; STORE: for.body:
; STORE-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2
; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]]
; STORE-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1
; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]]
; STORE-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*
; STORE-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
; STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP4]]
Expand Down Expand Up @@ -768,23 +720,14 @@ define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i
; STORE-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0
; STORE-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
; STORE: for.body.lr.ph:
; STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
; STORE-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
; STORE-NEXT: br label [[FOR_BODY:%.*]]
; STORE: for.body:
; STORE-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; STORE-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2
; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
; STORE-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1
; STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]]
; STORE-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2
; STORE-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]]
; STORE-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3
; STORE-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]]
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B]] to <4 x float>*
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
Expand Down Expand Up @@ -1386,10 +1329,7 @@ define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroe
; ALL-NEXT: entry:
; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
; ALL: bb:
; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>*
; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
; ALL-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -1423,10 +1363,7 @@ define i32 @reduction_result_used_in_phi_loop(i32* nocapture readonly %data, i1
; ALL-NEXT: entry:
; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
; ALL: bb:
; ALL-NEXT: [[IDX_1:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 1
; ALL-NEXT: [[IDX_2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
; ALL-NEXT: [[IDX_3:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 3
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA]] to <4 x i32>*
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>*
; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
; ALL-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -1484,10 +1421,7 @@ bb.1:

define float @fadd_v4f32_fmf(float* %p) {
; ALL-LABEL: @fadd_v4f32_fmf(
; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; ALL-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
; ALL-NEXT: ret float [[TMP3]]
Expand All @@ -1511,10 +1445,7 @@ define float @fadd_v4f32_fmf(float* %p) {

define float @fadd_v4f32_fmf_intersect(float* %p) {
; ALL-LABEL: @fadd_v4f32_fmf_intersect(
; ALL-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
; ALL-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
; ALL-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; ALL-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
; ALL-NEXT: ret float [[TMP3]]
Expand Down
135 changes: 0 additions & 135 deletions llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,6 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
; SSE-NEXT: [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
; SSE-NEXT: [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
; SSE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1
; SSE-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1
; SSE-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1
; SSE-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1
; SSE-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1
; SSE-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2
; SSE-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2
; SSE-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2
; SSE-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
; SSE-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
; SSE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
; SSE-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
; SSE-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
; SSE-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
; SSE-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
; SSE-NEXT: [[TMP4:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>*
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1
; SSE-NEXT: [[TMP6:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>*
Expand All @@ -75,21 +60,6 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
; SSE-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4
; SSE-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4
; SSE-NEXT: [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5
; SSE-NEXT: [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5
; SSE-NEXT: [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5
; SSE-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5
; SSE-NEXT: [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5
; SSE-NEXT: [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6
; SSE-NEXT: [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6
; SSE-NEXT: [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6
; SSE-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
; SSE-NEXT: [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
; SSE-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
; SSE-NEXT: [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
; SSE-NEXT: [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
; SSE-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
; SSE-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
; SSE-NEXT: [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>*
; SSE-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
; SSE-NEXT: [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>*
Expand All @@ -110,21 +80,6 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
; SSE-NEXT: [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8
; SSE-NEXT: [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8
; SSE-NEXT: [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9
; SSE-NEXT: [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9
; SSE-NEXT: [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9
; SSE-NEXT: [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9
; SSE-NEXT: [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9
; SSE-NEXT: [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10
; SSE-NEXT: [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10
; SSE-NEXT: [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10
; SSE-NEXT: [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
; SSE-NEXT: [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
; SSE-NEXT: [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
; SSE-NEXT: [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
; SSE-NEXT: [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
; SSE-NEXT: [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
; SSE-NEXT: [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
; SSE-NEXT: [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>*
; SSE-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1
; SSE-NEXT: [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>*
Expand All @@ -145,21 +100,6 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
; SSE-NEXT: [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12
; SSE-NEXT: [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12
; SSE-NEXT: [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13
; SSE-NEXT: [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13
; SSE-NEXT: [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13
; SSE-NEXT: [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13
; SSE-NEXT: [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13
; SSE-NEXT: [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14
; SSE-NEXT: [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14
; SSE-NEXT: [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14
; SSE-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
; SSE-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
; SSE-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
; SSE-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
; SSE-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
; SSE-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
; SSE-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
; SSE-NEXT: [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>*
; SSE-NEXT: [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1
; SSE-NEXT: [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>*
Expand Down Expand Up @@ -198,81 +138,6 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; AVX512-NEXT: [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT: [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT: [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1
; AVX512-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1
; AVX512-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1
; AVX512-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1
; AVX512-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1
; AVX512-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2
; AVX512-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2
; AVX512-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2
; AVX512-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
; AVX512-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
; AVX512-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
; AVX512-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
; AVX512-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
; AVX512-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
; AVX512-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
; AVX512-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4
; AVX512-NEXT: [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4
; AVX512-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
; AVX512-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4
; AVX512-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4
; AVX512-NEXT: [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5
; AVX512-NEXT: [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5
; AVX512-NEXT: [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5
; AVX512-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5
; AVX512-NEXT: [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5
; AVX512-NEXT: [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6
; AVX512-NEXT: [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6
; AVX512-NEXT: [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6
; AVX512-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
; AVX512-NEXT: [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
; AVX512-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
; AVX512-NEXT: [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
; AVX512-NEXT: [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
; AVX512-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
; AVX512-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
; AVX512-NEXT: [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8
; AVX512-NEXT: [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8
; AVX512-NEXT: [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
; AVX512-NEXT: [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8
; AVX512-NEXT: [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8
; AVX512-NEXT: [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9
; AVX512-NEXT: [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9
; AVX512-NEXT: [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9
; AVX512-NEXT: [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9
; AVX512-NEXT: [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9
; AVX512-NEXT: [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10
; AVX512-NEXT: [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10
; AVX512-NEXT: [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10
; AVX512-NEXT: [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
; AVX512-NEXT: [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
; AVX512-NEXT: [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
; AVX512-NEXT: [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
; AVX512-NEXT: [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
; AVX512-NEXT: [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
; AVX512-NEXT: [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
; AVX512-NEXT: [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12
; AVX512-NEXT: [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12
; AVX512-NEXT: [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
; AVX512-NEXT: [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12
; AVX512-NEXT: [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12
; AVX512-NEXT: [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13
; AVX512-NEXT: [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13
; AVX512-NEXT: [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13
; AVX512-NEXT: [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13
; AVX512-NEXT: [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13
; AVX512-NEXT: [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14
; AVX512-NEXT: [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14
; AVX512-NEXT: [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14
; AVX512-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
; AVX512-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
; AVX512-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
; AVX512-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
; AVX512-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
; AVX512-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
; AVX512-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
; AVX512-NEXT: [[TMP1:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>*
; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
; AVX512-NEXT: [[TMP3:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>*
Expand Down
1 change: 0 additions & 1 deletion llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* undef, align 4
; CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_SW:%.*]], %struct.sw* [[V:%.*]], i64 0, i32 0
; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_SW]], %struct.sw* [[V]], i64 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* undef, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16
Expand Down
20 changes: 1 addition & 19 deletions llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@ define void @julia_2xdouble([2 x double]* sret([2 x double]), [2 x double]*, [2
; CHECK-NEXT: top:
; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1
; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1
; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
Expand Down Expand Up @@ -53,16 +50,7 @@ define void @julia_4xfloat([4 x float]* sret([4 x float]), [4 x float]*, [4 x fl
; CHECK-NEXT: top:
; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1
; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1
; CHECK-NEXT: [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2
; CHECK-NEXT: [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2
; CHECK-NEXT: [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3
; CHECK-NEXT: [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3
; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1
; CHECK-NEXT: [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2
; CHECK-NEXT: [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
Expand Down Expand Up @@ -128,10 +116,8 @@ define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x fl
; CHECK-NEXT: top:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
Expand Down Expand Up @@ -172,10 +158,8 @@ define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c
; CHECK-NEXT: top:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
Expand Down Expand Up @@ -267,13 +251,11 @@ define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudov
; CHECK-NEXT: top:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC:%.*]] undef, float [[TMP5]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT0]], float [[TMP6]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@ define void @inst_size(i64* %a, <2 x i64> %b) {
; CHECK-LABEL: @inst_size(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[VAL:%.*]] = extractelement <2 x i64> [[B:%.*]], i32 0
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 1
; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2
; CHECK-NEXT: [[PTR4:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[A]] to <4 x i64>*
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[A:%.*]] to <4 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 4
; CHECK-NEXT: [[T41:%.*]] = icmp sgt i64 0, [[VAL]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i64> zeroinitializer, [[TMP1]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,10 @@ declare float @llvm.powi.f32.i32(float, i32)
define void @vec_powi_f32(float* %a, float* %c, i32 %P) {
; CHECK-LABEL: @vec_powi_f32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i32 2
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i32 3
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 1
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A]] to <4 x float>*
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP1]], i32 [[P:%.*]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[C]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
; CHECK-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,13 @@ define void @jumble1(i32* noalias nocapture readonly %A, i32* noalias nocapture
; CHECK-LABEL: @jumble1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP1]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -73,22 +64,13 @@ define void @jumble2(i32* noalias nocapture readonly %A, i32* noalias nocapture
; CHECK-LABEL: @jumble2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 10
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 11
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 13
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
; CHECK-NEXT: ret void
;
Expand Down
65 changes: 19 additions & 46 deletions llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,75 +44,48 @@ define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias noca
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 75
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX65:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX66:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP26:%.*]], <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP14:%.*]], <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP14]], [[FOR_INC]] ]
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else:
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4
; CHECK-NEXT: [[CMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX12]], align 4
; CHECK-NEXT: [[CMP13:%.*]] = icmp eq i32 [[TMP5]], 0
; CHECK-NEXT: br i1 [[CMP13]], label [[IF_THEN14:%.*]], label [[IF_ELSE27:%.*]]
; CHECK: if.then14:
; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>*
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX17]] to <4 x i32>*
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else27:
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4
; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[TMP14]], 0
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX28]], align 4
; CHECK-NEXT: [[CMP29:%.*]] = icmp eq i32 [[TMP8]], 0
; CHECK-NEXT: br i1 [[CMP29]], label [[IF_THEN30:%.*]], label [[IF_ELSE43:%.*]]
; CHECK: if.then30:
; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>*
; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX33]] to <4 x i32>*
; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else43:
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4
; CHECK-NEXT: [[CMP45:%.*]] = icmp eq i32 [[TMP20]], 0
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX44]], align 4
; CHECK-NEXT: [[CMP45:%.*]] = icmp eq i32 [[TMP11]], 0
; CHECK-NEXT: br i1 [[CMP45]], label [[IF_THEN46:%.*]], label [[FOR_INC]]
; CHECK: if.then46:
; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX52:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]]
; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP23]]
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>*
; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX49]] to <4 x i32>*
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[TMP26]] = phi <4 x i32> [ [[TMP7]], [[IF_THEN]] ], [ [[TMP13]], [[IF_THEN14]] ], [ [[TMP19]], [[IF_THEN30]] ], [ [[SHUFFLE]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ]
; CHECK-NEXT: [[TMP14]] = phi <4 x i32> [ [[TMP4]], [[IF_THEN]] ], [ [[TMP7]], [[IF_THEN14]] ], [ [[TMP10]], [[IF_THEN30]] ], [ [[SHUFFLE]], [[IF_THEN46]] ], [ [[TMP2]], [[IF_ELSE43]] ]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
Expand Down
15 changes: 0 additions & 15 deletions llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,8 @@
define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn, i32* noalias nocapture %out) {
; CHECK-LABEL: @jumbled-load(
; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
; CHECK-NEXT: [[INN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[INN:%.*]], i64 0
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 3
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i32, i32* [[INN_ADDR]], i64 1
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[INN_ADDR]] to <4 x i32>*
Expand Down Expand Up @@ -64,13 +55,7 @@ define i32 @jumbled-load(i32* noalias nocapture %in, i32* noalias nocapture %inn
define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias nocapture %out) {
; CHECK-LABEL: @jumbled-load-multiuses(
; CHECK-NEXT: [[IN_ADDR:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 0
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 3
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 1
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ define dso_local void @j() local_unnamed_addr {
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** @b, align 8
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 12
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 5
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 13
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 14
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 15
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4
; CHECK-NEXT: [[CONV19:%.*]] = sitofp i32 [[TMP1]] to float
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <2 x i32>*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
; CHECK-LABEL: @PR16739_byref(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
; CHECK-NEXT: [[X0:%.*]] = load float, float* [[GEP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
Expand All @@ -80,7 +79,6 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenceable(16) %x) {
; CHECK-LABEL: @PR16739_byref_alt(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
Expand Down Expand Up @@ -139,13 +137,9 @@ define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceabl
define void @PR43578_prefer128(i32* %r, i64* %p, i64* %q) #0 {
; CHECK-LABEL: @PR43578_prefer128(
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 0
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, i64* [[Q:%.*]], i64 0
; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 1
; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 2
; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[Q0]] to <2 x i64>*
Expand Down
6 changes: 0 additions & 6 deletions llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
; CHECK-LABEL: @PR16739_byref(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
; CHECK-NEXT: [[X0:%.*]] = load float, float* [[GEP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
Expand All @@ -80,7 +79,6 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
define <4 x float> @PR16739_byref_alt(<4 x float>* nocapture readonly dereferenceable(16) %x) {
; CHECK-LABEL: @PR16739_byref_alt(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
Expand Down Expand Up @@ -139,13 +137,9 @@ define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceabl
define void @PR43578_prefer128(i32* %r, i64* %p, i64* %q) #0 {
; CHECK-LABEL: @PR43578_prefer128(
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 0
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 2
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 3
; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, i64* [[Q:%.*]], i64 0
; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 1
; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 2
; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, i64* [[Q]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 2
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[Q0]] to <2 x i64>*
Expand Down
Loading