49 changes: 25 additions & 24 deletions llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,18 @@ define dso_local void @test_unordered_splits(%struct.S* nocapture %p) local_unna
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[G10]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 3
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 4
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 5
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 6
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[G20]] to <4 x i32>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[G20]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 7
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX30]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP5]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP4]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[TMP7]], <8 x i32>* [[TMP8]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down Expand Up @@ -198,30 +198,31 @@ define dso_local void @test_cost_splits(%struct.S* nocapture %p) local_unnamed_a
; CHECK-NEXT: [[G21:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P3]], i32 0, i64 13
; CHECK-NEXT: [[G22:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P4]], i32 0, i64 14
; CHECK-NEXT: [[G23:%.*]] = getelementptr inbounds [16 x i32], [16 x i32]* [[P4]], i32 0, i64 15
; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[G10]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P:%.*]], i64 0, i32 0, i64 0
; CHECK-NEXT: store i32 [[I1]], i32* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[I3:%.*]] = load i32, i32* [[G11]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[G10]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 1
; CHECK-NEXT: store i32 [[I3]], i32* [[ARRAYIDX9]], align 4
; CHECK-NEXT: [[I5:%.*]] = load i32, i32* [[G12]], align 4
; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 2
; CHECK-NEXT: store i32 [[I5]], i32* [[ARRAYIDX16]], align 4
; CHECK-NEXT: [[I7:%.*]] = load i32, i32* [[G13]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[G12]] to <2 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 3
; CHECK-NEXT: store i32 [[I7]], i32* [[ARRAYIDX23]], align 4
; CHECK-NEXT: [[I9:%.*]] = load i32, i32* [[G20]], align 4
; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 4
; CHECK-NEXT: store i32 [[I9]], i32* [[ARRAYIDX30]], align 4
; CHECK-NEXT: [[I11:%.*]] = load i32, i32* [[G21]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[G20]] to <2 x i32>*
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 5
; CHECK-NEXT: store i32 [[I11]], i32* [[ARRAYIDX37]], align 4
; CHECK-NEXT: [[I13:%.*]] = load i32, i32* [[G22]], align 4
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 6
; CHECK-NEXT: store i32 [[I13]], i32* [[ARRAYIDX44]], align 4
; CHECK-NEXT: [[I15:%.*]] = load i32, i32* [[G23]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[G22]] to <2 x i32>*
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4
; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 0, i32 0, i64 7
; CHECK-NEXT: store i32 [[I15]], i32* [[ARRAYIDX51]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[ARRAYIDX2]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[TMP14]], <8 x i32>* [[TMP15]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand Down
14 changes: 8 additions & 6 deletions llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,19 +166,21 @@ define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, fl
; CHECK-NEXT: [[DST_ADDR_022:%.*]] = phi float* [ [[ADD_PTR8:%.*]], [[FOR_BODY]] ], [ [[DST:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[SRC_ADDR_021:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[SRC:%.*]], [[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[SRC_ADDR_021]], align 4
; CHECK-NEXT: store float [[TMP0]], float* [[DST_ADDR_022]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 4
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 1
; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX3]], align 4
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 2
; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 3
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX4]] to <2 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 3
; CHECK-NEXT: store float [[TMP3]], float* [[ARRAYIDX7]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[DST_ADDR_022]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, float* [[SRC_ADDR_021]], i64 [[I_023]]
; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds float, float* [[DST_ADDR_022]], i64 [[I_023]]
; CHECK-NEXT: [[INC]] = add i64 [[I_023]], 1
Expand Down