32 changes: 13 additions & 19 deletions llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,39 +28,33 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
ret <2 x i64> %tmp3.1
}

define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
define void @store_chain_v2i64(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: @store_chain_v2i64(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8
; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[C:%.*]], align 8
; CHECK-NEXT: ret void
;
%a.0 = getelementptr i64, i64* %a, i64 0
%a.1 = getelementptr i64, i64* %a, i64 1
%b.0 = getelementptr i64, i64* %b, i64 0
%b.1 = getelementptr i64, i64* %b, i64 1
%c.0 = getelementptr i64, i64* %c, i64 0
%c.1 = getelementptr i64, i64* %c, i64 1
%v0.0 = load i64, i64* %a.0, align 8
%v0.1 = load i64, i64* %a.1, align 8
%v1.0 = load i64, i64* %b.0, align 8
%v1.1 = load i64, i64* %b.1, align 8
%a.1 = getelementptr i64, ptr %a, i64 1
%b.1 = getelementptr i64, ptr %b, i64 1
%c.1 = getelementptr i64, ptr %c, i64 1
%v0.0 = load i64, ptr %a, align 8
%v0.1 = load i64, ptr %a.1, align 8
%v1.0 = load i64, ptr %b, align 8
%v1.1 = load i64, ptr %b.1, align 8
%tmp0.0 = add i64 %v0.0, %v1.0
%tmp0.1 = add i64 %v0.1, %v1.1
%tmp1.0 = sub i64 %v0.0, %v1.0
%tmp1.1 = sub i64 %v0.1, %v1.1
%tmp2.0 = add i64 %tmp0.0, %tmp0.1
%tmp2.1 = add i64 %tmp1.0, %tmp1.1
store i64 %tmp2.0, i64* %c.0, align 8
store i64 %tmp2.1, i64* %c.1, align 8
store i64 %tmp2.0, ptr %c, align 8
store i64 %tmp2.1, ptr %c.1, align 8
ret void
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ define dso_local void @l() local_unnamed_addr {
; CHECK-NEXT: [[I35:%.*]] = phi i32 [ [[I33]], [[BB25]] ]
; CHECK-NEXT: br label [[BB36:%.*]]
; CHECK: bb36:
; CHECK-NEXT: store i32 [[I35]], i32* @d, align 4
; CHECK-NEXT: store i32 [[I35]], ptr @d, align 4
; CHECK-NEXT: ret void
;
bb:
Expand Down Expand Up @@ -95,6 +95,6 @@ bb34: ; preds = %bb25
br label %bb36

bb36: ; preds = %bb34
store i32 %i35, i32* @d, align 4
store i32 %i35, ptr @d, align 4
ret void
}
45 changes: 20 additions & 25 deletions llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,13 @@
; are not selected for vectorization. Instead we should vectorize with
; contiguous loads, from %a plus offsets 0 to 3, or offsets 1 to 4.

define void @s116_modified(float* %a) {
define void @s116_modified(ptr %a) {
; CHECK-LABEL: @s116_modified(
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 0
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
; CHECK-NEXT: [[LD0:%.*]] = load float, float* [[GEP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 1
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 3
; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[GEP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
Expand All @@ -33,28 +30,26 @@ define void @s116_modified(float* %a) {
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 undef, i32 2, i32 4>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4
; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[A]], align 4
; CHECK-NEXT: ret void
;
%gep0 = getelementptr inbounds float, float* %a, i64 0
%gep1 = getelementptr inbounds float, float* %a, i64 1
%gep2 = getelementptr inbounds float, float* %a, i64 2
%gep3 = getelementptr inbounds float, float* %a, i64 3
%gep4 = getelementptr inbounds float, float* %a, i64 4
%ld0 = load float, float* %gep0
%ld1 = load float, float* %gep1
%ld2 = load float, float* %gep2
%ld3 = load float, float* %gep3
%ld4 = load float, float* %gep4
%gep1 = getelementptr inbounds float, ptr %a, i64 1
%gep2 = getelementptr inbounds float, ptr %a, i64 2
%gep3 = getelementptr inbounds float, ptr %a, i64 3
%gep4 = getelementptr inbounds float, ptr %a, i64 4
%ld0 = load float, ptr %a
%ld1 = load float, ptr %gep1
%ld2 = load float, ptr %gep2
%ld3 = load float, ptr %gep3
%ld4 = load float, ptr %gep4
%mul0 = fmul fast float %ld0, %ld1
%mul1 = fmul fast float %ld2, %ld1
%mul2 = fmul fast float %ld3, %ld2
%mul3 = fmul fast float %ld4, %ld3
store float %mul0, float* %gep0
store float %mul1, float* %gep1
store float %mul2, float* %gep2
store float %mul3, float* %gep3
store float %mul0, ptr %a
store float %mul1, ptr %gep1
store float %mul2, ptr %gep2
store float %mul3, ptr %gep3
ret void
}

Expand Down
68 changes: 32 additions & 36 deletions llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s352.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,30 @@ define i32 @s352() {
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[DOT_115:%.*]] = phi float [ 0.000000e+00, [[PREHEADER]] ], [ [[ADD39:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA:%.*]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[ARRAYIDX]] to <2 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX6]] to <2 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA:%.*]], ptr @global_data, i64 0, i32 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 3, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[DOT_115]], [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[ADD]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP7]]
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX18]] to <2 x float>*
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[TMP8]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[ARRAYIDX21]] to <2 x float>*
; CHECK-NEXT: [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[TMP10]], align 4
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 0, i64 [[TMP7]]
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 3, i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX18]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[ARRAYIDX21]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x float> [[TMP9]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP12]], i32 0
; CHECK-NEXT: [[ADD23:%.*]] = fadd float [[ADD15]], [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP12]], i32 1
; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD23]], [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 0, i64 [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX34]], align 4
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], %struct.GlobalData* @global_data, i64 0, i32 3, i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX37]], align 4
; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 0, i64 [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [[STRUCT_GLOBALDATA]], ptr @global_data, i64 0, i32 3, i64 [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX37]], align 4
; CHECK-NEXT: [[MUL38:%.*]] = fmul float [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[ADD39]] = fadd float [[ADD31]], [[MUL38]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
Expand All @@ -81,38 +77,38 @@ for.cond.cleanup3:
for.body:
%indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for.body ]
%dot.115 = phi float [ 0.000000e+00, %preheader ], [ %add39, %for.body ]
%arrayidx = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%arrayidx6 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
%1 = load float, float* %arrayidx6, align 4
%arrayidx = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%arrayidx6 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %indvars.iv
%1 = load float, ptr %arrayidx6, align 4
%mul7 = fmul float %0, %1
%add = fadd float %dot.115, %mul7
%2 = add nuw nsw i64 %indvars.iv, 1
%arrayidx10 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %2
%3 = load float, float* %arrayidx10, align 4
%arrayidx13 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %2
%4 = load float, float* %arrayidx13, align 4
%arrayidx10 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %2
%3 = load float, ptr %arrayidx10, align 4
%arrayidx13 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %2
%4 = load float, ptr %arrayidx13, align 4
%mul14 = fmul float %3, %4
%add15 = fadd float %add, %mul14
%5 = add nuw nsw i64 %indvars.iv, 2
%arrayidx18 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %5
%6 = load float, float* %arrayidx18, align 4
%arrayidx21 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %5
%7 = load float, float* %arrayidx21, align 4
%arrayidx18 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %5
%6 = load float, ptr %arrayidx18, align 4
%arrayidx21 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %5
%7 = load float, ptr %arrayidx21, align 4
%mul22 = fmul float %6, %7
%add23 = fadd float %add15, %mul22
%8 = add nuw nsw i64 %indvars.iv, 3
%arrayidx26 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %8
%9 = load float, float* %arrayidx26, align 4
%arrayidx29 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %8
%10 = load float, float* %arrayidx29, align 4
%arrayidx26 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %8
%9 = load float, ptr %arrayidx26, align 4
%arrayidx29 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %8
%10 = load float, ptr %arrayidx29, align 4
%mul30 = fmul float %9, %10
%add31 = fadd float %add23, %mul30
%11 = add nuw nsw i64 %indvars.iv, 4
%arrayidx34 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %11
%12 = load float, float* %arrayidx34, align 4
%arrayidx37 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %11
%13 = load float, float* %arrayidx37, align 4
%arrayidx34 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %11
%12 = load float, ptr %arrayidx34, align 4
%arrayidx37 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %11
%13 = load float, ptr %arrayidx37, align 4
%mul38 = fmul float %12, %13
%add39 = fadd float %add31, %mul38
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

142 changes: 69 additions & 73 deletions llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,51 @@ target triple = "aarch64"
; There are no 'or' operations, so it can't be a bswap or
; other pattern that we are expecting the backend to handle.

define void @PR50256(i8* %a, i16* %b, i32 %n) {
define void @PR50256(ptr %a, ptr %b, i32 %n) {
; CHECK-LABEL: @PR50256(
; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 8
; CHECK-NEXT: [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i64 8
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[A]] to <8 x i8>*
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 8
; CHECK-NEXT: [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[A]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[B]] to <8 x i16>*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX_8]] to <8 x i8>*
; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[TMP6]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_8]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[TMP7]] to <8 x i16>
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* [[TMP5]], align 2
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ARRAYIDX3_8]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP9]], <8 x i16>* [[TMP10]], align 2
; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[B]], align 2
; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr [[ARRAYIDX3_8]], align 2
; CHECK-NEXT: ret void
;
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i64 1
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i64 2
%arrayidx.3 = getelementptr inbounds i8, i8* %a, i64 3
%arrayidx.4 = getelementptr inbounds i8, i8* %a, i64 4
%arrayidx.5 = getelementptr inbounds i8, i8* %a, i64 5
%arrayidx.7 = getelementptr inbounds i8, i8* %a, i64 7
%arrayidx.6 = getelementptr inbounds i8, i8* %a, i64 6
%arrayidx.8 = getelementptr inbounds i8, i8* %a, i64 8
%arrayidx.9 = getelementptr inbounds i8, i8* %a, i64 9
%arrayidx.10 = getelementptr inbounds i8, i8* %a, i64 10
%arrayidx.11 = getelementptr inbounds i8, i8* %a, i64 11
%arrayidx.12 = getelementptr inbounds i8, i8* %a, i64 12
%arrayidx.13 = getelementptr inbounds i8, i8* %a, i64 13
%arrayidx.14 = getelementptr inbounds i8, i8* %a, i64 14
%arrayidx.15 = getelementptr inbounds i8, i8* %a, i64 15
%i = load i8, i8* %a, align 1
%i1 = load i8, i8* %arrayidx.1, align 1
%i2 = load i8, i8* %arrayidx.2, align 1
%i3 = load i8, i8* %arrayidx.3, align 1
%i4 = load i8, i8* %arrayidx.4, align 1
%i5 = load i8, i8* %arrayidx.5, align 1
%i6 = load i8, i8* %arrayidx.6, align 1
%i7 = load i8, i8* %arrayidx.7, align 1
%i8 = load i8, i8* %arrayidx.8, align 1
%i9 = load i8, i8* %arrayidx.9, align 1
%i10 = load i8, i8* %arrayidx.10, align 1
%i11 = load i8, i8* %arrayidx.11, align 1
%i12 = load i8, i8* %arrayidx.12, align 1
%i13 = load i8, i8* %arrayidx.13, align 1
%i14 = load i8, i8* %arrayidx.14, align 1
%i15 = load i8, i8* %arrayidx.15, align 1
%arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 1
%arrayidx.2 = getelementptr inbounds i8, ptr %a, i64 2
%arrayidx.3 = getelementptr inbounds i8, ptr %a, i64 3
%arrayidx.4 = getelementptr inbounds i8, ptr %a, i64 4
%arrayidx.5 = getelementptr inbounds i8, ptr %a, i64 5
%arrayidx.7 = getelementptr inbounds i8, ptr %a, i64 7
%arrayidx.6 = getelementptr inbounds i8, ptr %a, i64 6
%arrayidx.8 = getelementptr inbounds i8, ptr %a, i64 8
%arrayidx.9 = getelementptr inbounds i8, ptr %a, i64 9
%arrayidx.10 = getelementptr inbounds i8, ptr %a, i64 10
%arrayidx.11 = getelementptr inbounds i8, ptr %a, i64 11
%arrayidx.12 = getelementptr inbounds i8, ptr %a, i64 12
%arrayidx.13 = getelementptr inbounds i8, ptr %a, i64 13
%arrayidx.14 = getelementptr inbounds i8, ptr %a, i64 14
%arrayidx.15 = getelementptr inbounds i8, ptr %a, i64 15
%i = load i8, ptr %a, align 1
%i1 = load i8, ptr %arrayidx.1, align 1
%i2 = load i8, ptr %arrayidx.2, align 1
%i3 = load i8, ptr %arrayidx.3, align 1
%i4 = load i8, ptr %arrayidx.4, align 1
%i5 = load i8, ptr %arrayidx.5, align 1
%i6 = load i8, ptr %arrayidx.6, align 1
%i7 = load i8, ptr %arrayidx.7, align 1
%i8 = load i8, ptr %arrayidx.8, align 1
%i9 = load i8, ptr %arrayidx.9, align 1
%i10 = load i8, ptr %arrayidx.10, align 1
%i11 = load i8, ptr %arrayidx.11, align 1
%i12 = load i8, ptr %arrayidx.12, align 1
%i13 = load i8, ptr %arrayidx.13, align 1
%i14 = load i8, ptr %arrayidx.14, align 1
%i15 = load i8, ptr %arrayidx.15, align 1
%conv5 = zext i8 %i to i16
%conv5.1 = zext i8 %i1 to i16
%conv5.2 = zext i8 %i2 to i16
Expand Down Expand Up @@ -89,36 +85,36 @@ define void @PR50256(i8* %a, i16* %b, i32 %n) {
%shl.13 = shl nuw i16 %conv5.13, 8
%shl.14 = shl nuw i16 %conv5.14, 8
%shl.15 = shl nuw i16 %conv5.15, 8
%arrayidx3.1 = getelementptr inbounds i16, i16* %b, i64 1
%arrayidx3.2 = getelementptr inbounds i16, i16* %b, i64 2
%arrayidx3.3 = getelementptr inbounds i16, i16* %b, i64 3
%arrayidx3.4 = getelementptr inbounds i16, i16* %b, i64 4
%arrayidx3.5 = getelementptr inbounds i16, i16* %b, i64 5
%arrayidx3.6 = getelementptr inbounds i16, i16* %b, i64 6
%arrayidx3.7 = getelementptr inbounds i16, i16* %b, i64 7
%arrayidx3.8 = getelementptr inbounds i16, i16* %b, i64 8
%arrayidx3.9 = getelementptr inbounds i16, i16* %b, i64 9
%arrayidx3.10 = getelementptr inbounds i16, i16* %b, i64 10
%arrayidx3.11 = getelementptr inbounds i16, i16* %b, i64 11
%arrayidx3.12 = getelementptr inbounds i16, i16* %b, i64 12
%arrayidx3.13 = getelementptr inbounds i16, i16* %b, i64 13
%arrayidx3.14 = getelementptr inbounds i16, i16* %b, i64 14
%arrayidx3.15 = getelementptr inbounds i16, i16* %b, i64 15
store i16 %shl, i16* %b, align 2
store i16 %shl.1, i16* %arrayidx3.1, align 2
store i16 %shl.2, i16* %arrayidx3.2, align 2
store i16 %shl.3, i16* %arrayidx3.3, align 2
store i16 %shl.4, i16* %arrayidx3.4, align 2
store i16 %shl.5, i16* %arrayidx3.5, align 2
store i16 %shl.6, i16* %arrayidx3.6, align 2
store i16 %shl.7, i16* %arrayidx3.7, align 2
store i16 %shl.8, i16* %arrayidx3.8, align 2
store i16 %shl.9, i16* %arrayidx3.9, align 2
store i16 %shl.10, i16* %arrayidx3.10, align 2
store i16 %shl.11, i16* %arrayidx3.11, align 2
store i16 %shl.12, i16* %arrayidx3.12, align 2
store i16 %shl.13, i16* %arrayidx3.13, align 2
store i16 %shl.14, i16* %arrayidx3.14, align 2
store i16 %shl.15, i16* %arrayidx3.15, align 2
%arrayidx3.1 = getelementptr inbounds i16, ptr %b, i64 1
%arrayidx3.2 = getelementptr inbounds i16, ptr %b, i64 2
%arrayidx3.3 = getelementptr inbounds i16, ptr %b, i64 3
%arrayidx3.4 = getelementptr inbounds i16, ptr %b, i64 4
%arrayidx3.5 = getelementptr inbounds i16, ptr %b, i64 5
%arrayidx3.6 = getelementptr inbounds i16, ptr %b, i64 6
%arrayidx3.7 = getelementptr inbounds i16, ptr %b, i64 7
%arrayidx3.8 = getelementptr inbounds i16, ptr %b, i64 8
%arrayidx3.9 = getelementptr inbounds i16, ptr %b, i64 9
%arrayidx3.10 = getelementptr inbounds i16, ptr %b, i64 10
%arrayidx3.11 = getelementptr inbounds i16, ptr %b, i64 11
%arrayidx3.12 = getelementptr inbounds i16, ptr %b, i64 12
%arrayidx3.13 = getelementptr inbounds i16, ptr %b, i64 13
%arrayidx3.14 = getelementptr inbounds i16, ptr %b, i64 14
%arrayidx3.15 = getelementptr inbounds i16, ptr %b, i64 15
store i16 %shl, ptr %b, align 2
store i16 %shl.1, ptr %arrayidx3.1, align 2
store i16 %shl.2, ptr %arrayidx3.2, align 2
store i16 %shl.3, ptr %arrayidx3.3, align 2
store i16 %shl.4, ptr %arrayidx3.4, align 2
store i16 %shl.5, ptr %arrayidx3.5, align 2
store i16 %shl.6, ptr %arrayidx3.6, align 2
store i16 %shl.7, ptr %arrayidx3.7, align 2
store i16 %shl.8, ptr %arrayidx3.8, align 2
store i16 %shl.9, ptr %arrayidx3.9, align 2
store i16 %shl.10, ptr %arrayidx3.10, align 2
store i16 %shl.11, ptr %arrayidx3.11, align 2
store i16 %shl.12, ptr %arrayidx3.12, align 2
store i16 %shl.13, ptr %arrayidx3.13, align 2
store i16 %shl.14, ptr %arrayidx3.14, align 2
store i16 %shl.15, ptr %arrayidx3.15, align 2
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,162 +9,162 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
define void @slp_scev_assert(i32 %idx, i64 %tmp3) #0 {
; CHECK-LABEL: @slp_scev_assert(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = addrspacecast i8 addrspace(5)* undef to i8*
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* undef, i32 [[IDX:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP]], i64 [[TMP3:%.*]]
; CHECK-NEXT: store i8 0, i8 addrspace(5)* [[TMP2]], align 1
; CHECK-NEXT: store i8 0, i8* [[TMP4]], align 1
; CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(5) undef to ptr
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(5) undef, i32 [[IDX:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 [[TMP3:%.*]]
; CHECK-NEXT: store i8 0, ptr addrspace(5) [[TMP2]], align 1
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
; CHECK-NEXT: ret void
;
bb:
%tmp = addrspacecast i8 addrspace(5)* undef to i8*
%tmp2 = getelementptr inbounds i8, i8 addrspace(5)* undef, i32 %idx
%tmp4 = getelementptr inbounds i8, i8* %tmp, i64 %tmp3
store i8 0, i8 addrspace(5)* %tmp2
store i8 0, i8* %tmp4
%tmp = addrspacecast ptr addrspace(5) undef to ptr
%tmp2 = getelementptr inbounds i8, ptr addrspace(5) undef, i32 %idx
%tmp4 = getelementptr inbounds i8, ptr %tmp, i64 %tmp3
store i8 0, ptr addrspace(5) %tmp2
store i8 0, ptr %tmp4
ret void
}

define void @multi_as_reduction_different_sized(i32 addrspace(3)* %lds, i32 %idx0, i64 %idx1) #0 {
define void @multi_as_reduction_different_sized(ptr addrspace(3) %lds, i32 %idx0, i64 %idx1) #0 {
; CHECK-LABEL: @multi_as_reduction_different_sized(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast i32 addrspace(3)* [[LDS:%.*]] to i32*
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[IDX0:%.*]], 2
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* [[LDS]], i32 [[ADD0]]
; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, i32* [[FLAT]], i64 [[ADD1]]
; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, i32 addrspace(3)* [[LDS]], align 4
; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, i32 addrspace(3)* [[LDS_1]], align 4
; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, i32* [[FLAT]], align 4
; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i32 [[ADD0]]
; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
; CHECK-NEXT: store i32 [[SUB0]], i32* undef, align 4
; CHECK-NEXT: store i32 [[SUB1]], i32* undef, align 4
; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4
; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4
; CHECK-NEXT: ret void
;
bb:
%flat = addrspacecast i32 addrspace(3)* %lds to i32*
%flat = addrspacecast ptr addrspace(3) %lds to ptr
%add0 = add i32 %idx0, 2
%add1 = add i64 %idx1, 1

%lds.1 = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 %add0
%flat.1 = getelementptr inbounds i32, i32* %flat, i64 %add1
%lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 %add0
%flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1

%load.lds.0 = load i32, i32 addrspace(3)* %lds, align 4
%load.lds.1 = load i32, i32 addrspace(3)* %lds.1, align 4
%load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
%load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4

%load.flat.0 = load i32, i32* %flat, align 4
%load.flat.1 = load i32, i32* %flat.1, align 4
%load.flat.0 = load i32, ptr %flat, align 4
%load.flat.1 = load i32, ptr %flat.1, align 4

%sub0 = sub i32 %load.flat.0, %load.lds.0
%sub1 = sub i32 %load.flat.1, %load.lds.1

store i32 %sub0, i32* undef
store i32 %sub1, i32* undef
store i32 %sub0, ptr undef
store i32 %sub1, ptr undef
ret void
}

; This should vectorize if using getUnderlyingObject
define void @multi_as_reduction_same_size(i32 addrspace(1)* %global, i64 %idx0, i64 %idx1) #0 {
define void @multi_as_reduction_same_size(ptr addrspace(1) %global, i64 %idx0, i64 %idx1) #0 {
; CHECK-LABEL: @multi_as_reduction_same_size(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast i32 addrspace(1)* [[GLOBAL:%.*]] to i32*
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL:%.*]] to ptr
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
; CHECK-NEXT: [[GLOBAL_1:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[GLOBAL]], i64 [[ADD0]]
; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, i32* [[FLAT]], i64 [[ADD1]]
; CHECK-NEXT: [[LOAD_GLOBAL_0:%.*]] = load i32, i32 addrspace(1)* [[GLOBAL]], align 4
; CHECK-NEXT: [[LOAD_GLOBAL_1:%.*]] = load i32, i32 addrspace(1)* [[GLOBAL_1]], align 4
; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, i32* [[FLAT]], align 4
; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
; CHECK-NEXT: [[GLOBAL_1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[GLOBAL]], i64 [[ADD0]]
; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
; CHECK-NEXT: [[LOAD_GLOBAL_0:%.*]] = load i32, ptr addrspace(1) [[GLOBAL]], align 4
; CHECK-NEXT: [[LOAD_GLOBAL_1:%.*]] = load i32, ptr addrspace(1) [[GLOBAL_1]], align 4
; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_GLOBAL_0]]
; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_GLOBAL_1]]
; CHECK-NEXT: store i32 [[SUB0]], i32* undef, align 4
; CHECK-NEXT: store i32 [[SUB1]], i32* undef, align 4
; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4
; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4
; CHECK-NEXT: ret void
;
bb:
%flat = addrspacecast i32 addrspace(1)* %global to i32*
%flat = addrspacecast ptr addrspace(1) %global to ptr
%add0 = add i64 %idx0, 2
%add1 = add i64 %idx1, 1

%global.1 = getelementptr inbounds i32, i32 addrspace(1)* %global, i64 %add0
%flat.1 = getelementptr inbounds i32, i32* %flat, i64 %add1
%global.1 = getelementptr inbounds i32, ptr addrspace(1) %global, i64 %add0
%flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1

%load.global.0 = load i32, i32 addrspace(1)* %global, align 4
%load.global.1 = load i32, i32 addrspace(1)* %global.1, align 4
%load.global.0 = load i32, ptr addrspace(1) %global, align 4
%load.global.1 = load i32, ptr addrspace(1) %global.1, align 4

%load.flat.0 = load i32, i32* %flat, align 4
%load.flat.1 = load i32, i32* %flat.1, align 4
%load.flat.0 = load i32, ptr %flat, align 4
%load.flat.1 = load i32, ptr %flat.1, align 4

%sub0 = sub i32 %load.flat.0, %load.global.0
%sub1 = sub i32 %load.flat.1, %load.global.1

store i32 %sub0, i32* undef
store i32 %sub1, i32* undef
store i32 %sub0, ptr undef
store i32 %sub1, ptr undef
ret void
}

; This should vectorize if using getUnderlyingObject
; The add is done in the same width, even though the address space size is smaller
define void @multi_as_reduction_different_sized_noncanon(i32 addrspace(3)* %lds, i64 %idx0, i64 %idx1) #0 {
define void @multi_as_reduction_different_sized_noncanon(ptr addrspace(3) %lds, i64 %idx0, i64 %idx1) #0 {
; CHECK-LABEL: @multi_as_reduction_different_sized_noncanon(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast i32 addrspace(3)* [[LDS:%.*]] to i32*
; CHECK-NEXT: [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
; CHECK-NEXT: [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* [[LDS]], i64 [[ADD0]]
; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, i32* [[FLAT]], i64 [[ADD1]]
; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, i32 addrspace(3)* [[LDS]], align 4
; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, i32 addrspace(3)* [[LDS_1]], align 4
; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, i32* [[FLAT]], align 4
; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
; CHECK-NEXT: [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i64 [[ADD0]]
; CHECK-NEXT: [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
; CHECK-NEXT: [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
; CHECK-NEXT: [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
; CHECK-NEXT: [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
; CHECK-NEXT: [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
; CHECK-NEXT: [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
; CHECK-NEXT: store i32 [[SUB0]], i32* undef, align 4
; CHECK-NEXT: store i32 [[SUB1]], i32* undef, align 4
; CHECK-NEXT: store i32 [[SUB0]], ptr undef, align 4
; CHECK-NEXT: store i32 [[SUB1]], ptr undef, align 4
; CHECK-NEXT: ret void
;
bb:
%flat = addrspacecast i32 addrspace(3)* %lds to i32*
%flat = addrspacecast ptr addrspace(3) %lds to ptr
%add0 = add i64 %idx0, 2
%add1 = add i64 %idx1, 1

%lds.1 = getelementptr inbounds i32, i32 addrspace(3)* %lds, i64 %add0
%flat.1 = getelementptr inbounds i32, i32* %flat, i64 %add1
%lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i64 %add0
%flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1

%load.lds.0 = load i32, i32 addrspace(3)* %lds, align 4
%load.lds.1 = load i32, i32 addrspace(3)* %lds.1, align 4
%load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
%load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4

%load.flat.0 = load i32, i32* %flat, align 4
%load.flat.1 = load i32, i32* %flat.1, align 4
%load.flat.0 = load i32, ptr %flat, align 4
%load.flat.1 = load i32, ptr %flat.1, align 4

%sub0 = sub i32 %load.flat.0, %load.lds.0
%sub1 = sub i32 %load.flat.1, %load.lds.1

store i32 %sub0, i32* undef
store i32 %sub1, i32* undef
store i32 %sub0, ptr undef
store i32 %sub1, ptr undef
ret void
}

define void @slp_crash_on_addrspacecast() {
; CHECK-LABEL: @slp_crash_on_addrspacecast(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
; CHECK-NEXT: [[P0:%.*]] = addrspacecast i64 addrspace(3)* [[TMP0]] to i64*
; CHECK-NEXT: store i64 undef, i64* [[P0]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
; CHECK-NEXT: [[P1:%.*]] = addrspacecast i64 addrspace(3)* [[TMP1]] to i64*
; CHECK-NEXT: store i64 undef, i64* [[P1]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
; CHECK-NEXT: [[P0:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
; CHECK-NEXT: store i64 undef, ptr [[P0]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
; CHECK-NEXT: [[P1:%.*]] = addrspacecast ptr addrspace(3) [[TMP1]] to ptr
; CHECK-NEXT: store i64 undef, ptr [[P1]], align 8
; CHECK-NEXT: ret void
;
entry:
%0 = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
%p0 = addrspacecast i64 addrspace(3)* %0 to i64*
store i64 undef, i64* %p0, align 8
%1 = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
%p1 = addrspacecast i64 addrspace(3)* %1 to i64*
store i64 undef, i64* %p1, align 8
%0 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
%p0 = addrspacecast ptr addrspace(3) %0 to ptr
store i64 undef, ptr %p0, align 8
%1 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
%p1 = addrspacecast ptr addrspace(3) %1 to ptr
store i64 undef, ptr %p1, align 8
ret void
}
110 changes: 55 additions & 55 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/horizontal-store.ll

Large diffs are not rendered by default.

276 changes: 125 additions & 151 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll

Large diffs are not rendered by default.

42 changes: 21 additions & 21 deletions llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,59 @@
; GFX908: fadd float
; GFX908: fadd float
; GFX90A: fadd <2 x float>
define amdgpu_kernel void @fadd_combine(float addrspace(1)* %arg) {
define amdgpu_kernel void @fadd_combine(ptr addrspace(1) %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp1
%tmp3 = load float, float addrspace(1)* %tmp2, align 4
%tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp1
%tmp3 = load float, ptr addrspace(1) %tmp2, align 4
%tmp4 = fadd float %tmp3, 1.000000e+00
store float %tmp4, float addrspace(1)* %tmp2, align 4
store float %tmp4, ptr addrspace(1) %tmp2, align 4
%tmp5 = add nuw nsw i64 %tmp1, 1
%tmp6 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp5
%tmp7 = load float, float addrspace(1)* %tmp6, align 4
%tmp6 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp5
%tmp7 = load float, ptr addrspace(1) %tmp6, align 4
%tmp8 = fadd float %tmp7, 1.000000e+00
store float %tmp8, float addrspace(1)* %tmp6, align 4
store float %tmp8, ptr addrspace(1) %tmp6, align 4
ret void
}

; GCN-LABEL: @fmul_combine
; GFX908: fmul float
; GFX908: fmul float
; GFX90A: fmul <2 x float>
define amdgpu_kernel void @fmul_combine(float addrspace(1)* %arg) {
define amdgpu_kernel void @fmul_combine(ptr addrspace(1) %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp1
%tmp3 = load float, float addrspace(1)* %tmp2, align 4
%tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp1
%tmp3 = load float, ptr addrspace(1) %tmp2, align 4
%tmp4 = fmul float %tmp3, 1.000000e+00
store float %tmp4, float addrspace(1)* %tmp2, align 4
store float %tmp4, ptr addrspace(1) %tmp2, align 4
%tmp5 = add nuw nsw i64 %tmp1, 1
%tmp6 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp5
%tmp7 = load float, float addrspace(1)* %tmp6, align 4
%tmp6 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp5
%tmp7 = load float, ptr addrspace(1) %tmp6, align 4
%tmp8 = fmul float %tmp7, 1.000000e+00
store float %tmp8, float addrspace(1)* %tmp6, align 4
store float %tmp8, ptr addrspace(1) %tmp6, align 4
ret void
}

; GCN-LABEL: @fma_combine
; GFX908: call float @llvm.fma.f32
; GFX908: call float @llvm.fma.f32
; GFX90A: call <2 x float> @llvm.fma.v2f32
define amdgpu_kernel void @fma_combine(float addrspace(1)* %arg) {
define amdgpu_kernel void @fma_combine(ptr addrspace(1) %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp1
%tmp3 = load float, float addrspace(1)* %tmp2, align 4
%tmp2 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp1
%tmp3 = load float, ptr addrspace(1) %tmp2, align 4
%tmp4 = tail call float @llvm.fma.f32(float %tmp3, float 1.000000e+00, float 1.000000e+00)
store float %tmp4, float addrspace(1)* %tmp2, align 4
store float %tmp4, ptr addrspace(1) %tmp2, align 4
%tmp5 = add nuw nsw i64 %tmp1, 1
%tmp6 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp5
%tmp7 = load float, float addrspace(1)* %tmp6, align 4
%tmp6 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp5
%tmp7 = load float, ptr addrspace(1) %tmp6, align 4
%tmp8 = tail call float @llvm.fma.f32(float %tmp7, float 1.000000e+00, float 1.000000e+00)
store float %tmp8, float addrspace(1)* %tmp6, align 4
store float %tmp8, ptr addrspace(1) %tmp6, align 4
ret void
}

Expand Down
26 changes: 13 additions & 13 deletions llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
; On swift unaligned <2 x double> stores need 4uops and it is there for cheaper
; to do this scalar.

define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
define void @expensive_double_store(ptr noalias %dst, ptr noalias %src, i64 %count) {
; CHECK-LABEL: @expensive_double_store(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[SRC:%.*]], align 8
; CHECK-NEXT: store double [[TMP0]], double* [[DST:%.*]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[SRC]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[DST]], i64 1
; CHECK-NEXT: store double [[TMP1]], double* [[ARRAYIDX3]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[SRC:%.*]], align 8
; CHECK-NEXT: store double [[TMP0]], ptr [[DST:%.*]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 1
; CHECK-NEXT: store double [[TMP1]], ptr [[ARRAYIDX3]], align 8
; CHECK-NEXT: ret void
;
entry:
%0 = load double, double* %src, align 8
store double %0, double* %dst, align 8
%arrayidx2 = getelementptr inbounds double, double* %src, i64 1
%1 = load double, double* %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds double, double* %dst, i64 1
store double %1, double* %arrayidx3, align 8
%0 = load double, ptr %src, align 8
store double %0, ptr %dst, align 8
%arrayidx2 = getelementptr inbounds double, ptr %src, i64 1
%1 = load double, ptr %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds double, ptr %dst, i64 1
store double %1, ptr %arrayidx3, align 8
ret void
}
50 changes: 21 additions & 29 deletions llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,18 @@
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 | FileCheck %s
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_40 | FileCheck %s -check-prefix=NOVECTOR

define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %arg, i8* noalias nocapture readonly align 256 dereferenceable(19267584) %arg1, i32 %arg2, i32 %arg3) local_unnamed_addr #0 {
define void @fusion(ptr noalias nocapture align 256 dereferenceable(19267584) %arg, ptr noalias nocapture readonly align 256 dereferenceable(19267584) %arg1, i32 %arg2, i32 %arg3) local_unnamed_addr #0 {
; CHECK-LABEL: @fusion(
; CHECK-NEXT: [[TMP:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP]], [[ARG3:%.*]]
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2
; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[ARG1:%.*]] to half*
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[ARG:%.*]] to half*
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[TMP11]] to <2 x half>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> [[TMP2]], <half 0xH5380, half 0xH5380>
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> [[TMP3]], <half 0xH57F0, half 0xH57F0>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast half* [[TMP16]] to <2 x half>*
; CHECK-NEXT: store <2 x half> [[TMP4]], <2 x half>* [[TMP5]], align 8
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr [[TMP16]], align 8
; CHECK-NEXT: ret void
;
; NOVECTOR-LABEL: @fusion(
Expand All @@ -26,41 +22,37 @@ define void @fusion(i8* noalias nocapture align 256 dereferenceable(19267584) %a
; NOVECTOR-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 2
; NOVECTOR-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
; NOVECTOR-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1
; NOVECTOR-NEXT: [[TMP10:%.*]] = bitcast i8* [[ARG1:%.*]] to half*
; NOVECTOR-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP6]]
; NOVECTOR-NEXT: [[TMP12:%.*]] = load half, half* [[TMP11]], align 8
; NOVECTOR-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]]
; NOVECTOR-NEXT: [[TMP12:%.*]] = load half, ptr [[TMP11]], align 8
; NOVECTOR-NEXT: [[TMP13:%.*]] = fmul fast half [[TMP12]], 0xH5380
; NOVECTOR-NEXT: [[TMP14:%.*]] = fadd fast half [[TMP13]], 0xH57F0
; NOVECTOR-NEXT: [[TMP15:%.*]] = bitcast i8* [[ARG:%.*]] to half*
; NOVECTOR-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP6]]
; NOVECTOR-NEXT: store half [[TMP14]], half* [[TMP16]], align 8
; NOVECTOR-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]]
; NOVECTOR-NEXT: [[TMP18:%.*]] = load half, half* [[TMP17]], align 2
; NOVECTOR-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]]
; NOVECTOR-NEXT: store half [[TMP14]], ptr [[TMP16]], align 8
; NOVECTOR-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, ptr [[ARG1]], i64 [[TMP7]]
; NOVECTOR-NEXT: [[TMP18:%.*]] = load half, ptr [[TMP17]], align 2
; NOVECTOR-NEXT: [[TMP19:%.*]] = fmul fast half [[TMP18]], 0xH5380
; NOVECTOR-NEXT: [[TMP20:%.*]] = fadd fast half [[TMP19]], 0xH57F0
; NOVECTOR-NEXT: [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]]
; NOVECTOR-NEXT: store half [[TMP20]], half* [[TMP21]], align 2
; NOVECTOR-NEXT: [[TMP21:%.*]] = getelementptr inbounds half, ptr [[ARG]], i64 [[TMP7]]
; NOVECTOR-NEXT: store half [[TMP20]], ptr [[TMP21]], align 2
; NOVECTOR-NEXT: ret void
;
%tmp = shl nuw nsw i32 %arg2, 6
%tmp4 = or i32 %tmp, %arg3
%tmp5 = shl nuw nsw i32 %tmp4, 2
%tmp6 = zext i32 %tmp5 to i64
%tmp7 = or i64 %tmp6, 1
%tmp10 = bitcast i8* %arg1 to half*
%tmp11 = getelementptr inbounds half, half* %tmp10, i64 %tmp6
%tmp12 = load half, half* %tmp11, align 8
%tmp11 = getelementptr inbounds half, ptr %arg1, i64 %tmp6
%tmp12 = load half, ptr %tmp11, align 8
%tmp13 = fmul fast half %tmp12, 0xH5380
%tmp14 = fadd fast half %tmp13, 0xH57F0
%tmp15 = bitcast i8* %arg to half*
%tmp16 = getelementptr inbounds half, half* %tmp15, i64 %tmp6
store half %tmp14, half* %tmp16, align 8
%tmp17 = getelementptr inbounds half, half* %tmp10, i64 %tmp7
%tmp18 = load half, half* %tmp17, align 2
%tmp16 = getelementptr inbounds half, ptr %arg, i64 %tmp6
store half %tmp14, ptr %tmp16, align 8
%tmp17 = getelementptr inbounds half, ptr %arg1, i64 %tmp7
%tmp18 = load half, ptr %tmp17, align 2
%tmp19 = fmul fast half %tmp18, 0xH5380
%tmp20 = fadd fast half %tmp19, 0xH57F0
%tmp21 = getelementptr inbounds half, half* %tmp15, i64 %tmp7
store half %tmp20, half* %tmp21, align 2
%tmp21 = getelementptr inbounds half, ptr %arg, i64 %tmp7
store half %tmp20, ptr %tmp21, align 2
ret void
}

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr9 -mattr=+vsx -passes=slp-vectorizer < %s | FileCheck %s

%struct.S = type { i8*, i8* }
%struct.S = type { ptr, ptr }

@kS0 = common global %struct.S zeroinitializer, align 8

define { i64, i64 } @getS() {
; CHECK-LABEL: @getS(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @kS0, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @kS0, i64 0, i32 1), align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP0]], 0
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i64, i64 } [[TMP2]], i64 [[TMP1]], 1
; CHECK-NEXT: ret { i64, i64 } [[TMP3]]
;
entry:
%0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
%1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
%0 = load i64, ptr @kS0, align 8
%1 = load i64, ptr getelementptr inbounds (%struct.S, ptr @kS0, i64 0, i32 1), align 8
%2 = insertvalue { i64, i64 } undef, i64 %0, 0
%3 = insertvalue { i64, i64 } %2, i64 %1, 1
ret { i64, i64 } %3
Expand Down
34 changes: 15 additions & 19 deletions llvm/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
Original file line number Diff line number Diff line change
@@ -1,42 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -passes=slp-vectorizer < %s | FileCheck %s

%struct.A = type { i8*, i8* }
%struct.A = type { ptr, ptr }

define i64 @foo(%struct.A* nocapture readonly %this) {
define i64 @foo(ptr nocapture readonly %this) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[END_I:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], %struct.A* [[THIS:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8** [[END_I]] to i64*
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.A* [[THIS]] to i64*
; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8
; CHECK-NEXT: [[END_I:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[THIS:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[END_I]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[THIS]], align 8
; CHECK-NEXT: [[SUB_PTR_SUB_I:%.*]] = sub i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[SUB_PTR_SUB_I]], 9
; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]]
; CHECK: lor.lhs.false:
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i8*
; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP1]] to i8*
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i8* [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP1]] to ptr
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt ptr [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP2]], i64 2, i64 -1
; CHECK-NEXT: ret i64 [[DOT]]
; CHECK: return:
; CHECK-NEXT: ret i64 2
;
entry:
%end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
%0 = bitcast i8** %end.i to i64*
%1 = load i64, i64* %0, align 8
%2 = bitcast %struct.A* %this to i64*
%3 = load i64, i64* %2, align 8
%sub.ptr.sub.i = sub i64 %1, %3
%end.i = getelementptr inbounds %struct.A, ptr %this, i64 0, i32 1
%0 = load i64, ptr %end.i, align 8
%1 = load i64, ptr %this, align 8
%sub.ptr.sub.i = sub i64 %0, %1
%cmp = icmp sgt i64 %sub.ptr.sub.i, 9
br i1 %cmp, label %return, label %lor.lhs.false

lor.lhs.false:
%4 = inttoptr i64 %3 to i8*
%5 = inttoptr i64 %1 to i8*
%cmp2 = icmp ugt i8* %5, %4
%2 = inttoptr i64 %1 to ptr
%3 = inttoptr i64 %0 to ptr
%cmp2 = icmp ugt ptr %3, %2
%. = select i1 %cmp2, i64 2, i64 -1
ret i64 %.

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/PowerPC/short-to-double.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
%struct._pp = type { i16, i16, i16, i16 }

; Function Attrs: norecurse nounwind readonly
define [5 x double] @foo(double %k, i64 %n, %struct._pp* nocapture readonly %p) local_unnamed_addr #0 {
define [5 x double] @foo(double %k, i64 %n, ptr nocapture readonly %p) local_unnamed_addr #0 {
entry:
%cmp17 = icmp sgt i64 %n, 0
br i1 %cmp17, label %for.body, label %for.cond.cleanup
Expand All @@ -20,13 +20,13 @@ for.body: ; preds = %entry, %for.body
%i.020 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%retval.sroa.4.019 = phi double [ %add10, %for.body ], [ 0.000000e+00, %entry ]
%retval.sroa.0.018 = phi double [ %add, %for.body ], [ 0.000000e+00, %entry ]
%r1 = getelementptr inbounds %struct._pp, %struct._pp* %p, i64 %i.020, i32 2
%0 = load i16, i16* %r1, align 2
%r1 = getelementptr inbounds %struct._pp, ptr %p, i64 %i.020, i32 2
%0 = load i16, ptr %r1, align 2
%conv2 = uitofp i16 %0 to double
%mul = fmul double %conv2, %k
%add = fadd double %retval.sroa.0.018, %mul
%g5 = getelementptr inbounds %struct._pp, %struct._pp* %p, i64 %i.020, i32 1
%1 = load i16, i16* %g5, align 2
%g5 = getelementptr inbounds %struct._pp, ptr %p, i64 %i.020, i32 1
%1 = load i16, ptr %g5, align 2
%conv7 = uitofp i16 %1 to double
%mul8 = fmul double %conv7, %k
%add10 = fadd double %retval.sroa.4.019, %mul8
Expand Down
54 changes: 26 additions & 28 deletions llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,54 +9,52 @@
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"

define void @foo(i64* nocapture writeonly %da) {
define void @foo(ptr nocapture writeonly %da) {
; CHECK-128-LABEL: @foo(
; CHECK-128-NEXT: entry:
; CHECK-128-NEXT: store i64 0, i64* [[DA:%.*]], align 8
; CHECK-128-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 1
; CHECK-128-NEXT: store i64 0, i64* [[ARRAYIDX1]], align 8
; CHECK-128-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2
; CHECK-128-NEXT: store i64 0, i64* [[ARRAYIDX2]], align 8
; CHECK-128-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3
; CHECK-128-NEXT: store i64 0, i64* [[ARRAYIDX3]], align 8
; CHECK-128-NEXT: store i64 0, ptr [[DA:%.*]], align 8
; CHECK-128-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 1
; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX1]], align 8
; CHECK-128-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 2
; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX2]], align 8
; CHECK-128-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 3
; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX3]], align 8
; CHECK-128-NEXT: ret void
;
; CHECK-256-LABEL: @foo(
; CHECK-256-NEXT: entry:
; CHECK-256-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
; CHECK-256-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
; CHECK-256-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8
; CHECK-256-NEXT: ret void
;
; CHECK-512-LABEL: @foo(
; CHECK-512-NEXT: entry:
; CHECK-512-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <4 x i64>*
; CHECK-512-NEXT: store <4 x i64> zeroinitializer, <4 x i64>* [[TMP0]], align 8
; CHECK-512-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8
; CHECK-512-NEXT: ret void
;
entry:
store i64 0, i64* %da, align 8
%arrayidx1 = getelementptr inbounds i64, i64* %da, i64 1
store i64 0, i64* %arrayidx1, align 8
%arrayidx2 = getelementptr inbounds i64, i64* %da, i64 2
store i64 0, i64* %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds i64, i64* %da, i64 3
store i64 0, i64* %arrayidx3, align 8
store i64 0, ptr %da, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %da, i64 1
store i64 0, ptr %arrayidx1, align 8
%arrayidx2 = getelementptr inbounds i64, ptr %da, i64 2
store i64 0, ptr %arrayidx2, align 8
%arrayidx3 = getelementptr inbounds i64, ptr %da, i64 3
store i64 0, ptr %arrayidx3, align 8
ret void
}

define void @foo8(i8* nocapture writeonly %da) {
define void @foo8(ptr nocapture writeonly %da) {
; CHECK-LABEL: @foo8(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i8 0, i8* [[DA:%.*]], align 8
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 1
; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 2
; CHECK-NEXT: store i8 0, ptr [[DA:%.*]], align 8
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DA]], i8 1
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[DA]], i8 2
; CHECK-NEXT: ret void
;
entry:
store i8 0, i8* %da, align 8
%arrayidx1 = getelementptr inbounds i8, i8* %da, i8 1
store i8 0, i8* %arrayidx1, align 8
%arrayidx2 = getelementptr inbounds i8, i8* %da, i8 2
store i8 0, ptr %da, align 8
%arrayidx1 = getelementptr inbounds i8, ptr %da, i8 1
store i8 0, ptr %arrayidx1, align 8
%arrayidx2 = getelementptr inbounds i8, ptr %da, i8 2
ret void
}
10 changes: 5 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
; node.

; Function Attrs: norecurse nounwind readonly
define void @fun(i8* nocapture, i32 zeroext) local_unnamed_addr #0 {
define void @fun(ptr nocapture, i32 zeroext) local_unnamed_addr #0 {
.lr.ph.preheader:
br label %.lr.ph

Expand All @@ -20,11 +20,11 @@ define void @fun(i8* nocapture, i32 zeroext) local_unnamed_addr #0 {
%7 = select i1 %6, i32 0, i32 %1
%.9 = sub i32 %3, %7
%8 = zext i32 %. to i64
%9 = getelementptr inbounds i8, i8* %0, i64 %8
%10 = load i8, i8* %9, align 1
%9 = getelementptr inbounds i8, ptr %0, i64 %8
%10 = load i8, ptr %9, align 1
%11 = zext i32 %.9 to i64
%12 = getelementptr inbounds i8, i8* %0, i64 %11
%13 = load i8, i8* %12, align 1
%12 = getelementptr inbounds i8, ptr %0, i64 %11
%13 = load i8, ptr %12, align 1
%14 = icmp eq i8 %10, %13
br i1 %14, label %.lr.ph, label %._crit_edge

Expand Down
35 changes: 17 additions & 18 deletions llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,40 @@ define void @foo() local_unnamed_addr {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD277:%.*]] = add nsw i32 undef, undef
; CHECK-NEXT: store i32 [[ADD277]], i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2) to <2 x i32>*), align 4
; CHECK-NEXT: store i32 [[ADD277]], ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 1), align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4
; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], <i32 6, i32 6, i32 6, i32 6>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[ARRAYIDX372]], align 4
; CHECK-NEXT: unreachable
;
entry:
%add277 = add nsw i32 undef, undef
store i32 %add277, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
%0 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
store i32 %add277, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 1), align 4
%0 = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 0), align 4
%sub355 = add nsw i32 undef, %0
%shr.i = ashr i32 %sub355, 6
%arrayidx372 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
store i32 %shr.i, i32* %arrayidx372, align 4
%arrayidx372 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0
store i32 %shr.i, ptr %arrayidx372, align 4
%sub355.1 = add nsw i32 undef, %add277
%shr.i.1 = ashr i32 %sub355.1, 6
%arrayidx372.1 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
store i32 %shr.i.1, i32* %arrayidx372.1, align 4
%1 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
%arrayidx372.1 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 1
store i32 %shr.i.1, ptr %arrayidx372.1, align 4
%1 = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4
%sub355.2 = add nsw i32 undef, %1
%shr.i.2 = ashr i32 %sub355.2, 6
%arrayidx372.2 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
store i32 %shr.i.2, i32* %arrayidx372.2, align 4
%2 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
%arrayidx372.2 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 2
store i32 %shr.i.2, ptr %arrayidx372.2, align 4
%2 = load i32, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 3), align 4
%sub355.3 = add nsw i32 undef, %2
%shr.i.3 = ashr i32 %sub355.3, 6
%arrayidx372.3 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
store i32 %shr.i.3, i32* %arrayidx372.3, align 4
%arrayidx372.3 = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 3
store i32 %shr.i.3, ptr %arrayidx372.3, align 4
unreachable
}
126 changes: 63 additions & 63 deletions llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,69 +9,69 @@
; VE-NOT: x double
; SSE: x double

define void @foo(double* noalias %A0p, double* noalias %B0p) {
define void @foo(ptr noalias %A0p, ptr noalias %B0p) {
entry:
%A1p = getelementptr inbounds double, double* %A0p, i64 1
%A2p = getelementptr inbounds double, double* %A0p, i64 2
%A3p = getelementptr inbounds double, double* %A0p, i64 3
%A4p = getelementptr inbounds double, double* %A0p, i64 4
%A5p = getelementptr inbounds double, double* %A0p, i64 5
%A6p = getelementptr inbounds double, double* %A0p, i64 6
%A7p = getelementptr inbounds double, double* %A0p, i64 7
%A8p = getelementptr inbounds double, double* %A0p, i64 8
%A9p = getelementptr inbounds double, double* %A0p, i64 9
%A10p = getelementptr inbounds double, double* %A0p, i64 10
%A11p = getelementptr inbounds double, double* %A0p, i64 11
%A12p = getelementptr inbounds double, double* %A0p, i64 12
%A13p = getelementptr inbounds double, double* %A0p, i64 13
%A14p = getelementptr inbounds double, double* %A0p, i64 14
%A15p = getelementptr inbounds double, double* %A0p, i64 15
%A0 = load double, double* %A0p, align 8
%A1 = load double, double* %A1p, align 8
%A2 = load double, double* %A2p, align 8
%A3 = load double, double* %A3p, align 8
%A4 = load double, double* %A4p, align 8
%A5 = load double, double* %A5p, align 8
%A6 = load double, double* %A6p, align 8
%A7 = load double, double* %A7p, align 8
%A8 = load double, double* %A8p, align 8
%A9 = load double, double* %A9p, align 8
%A10 = load double, double* %A10p, align 8
%A11 = load double, double* %A11p, align 8
%A12 = load double, double* %A12p, align 8
%A13 = load double, double* %A13p, align 8
%A14 = load double, double* %A14p, align 8
%A15 = load double, double* %A15p, align 8
%B1p = getelementptr inbounds double, double* %B0p, i64 1
%B2p = getelementptr inbounds double, double* %B0p, i64 2
%B3p = getelementptr inbounds double, double* %B0p, i64 3
%B4p = getelementptr inbounds double, double* %B0p, i64 4
%B5p = getelementptr inbounds double, double* %B0p, i64 5
%B6p = getelementptr inbounds double, double* %B0p, i64 6
%B7p = getelementptr inbounds double, double* %B0p, i64 7
%B8p = getelementptr inbounds double, double* %B0p, i64 8
%B9p = getelementptr inbounds double, double* %B0p, i64 9
%B10p = getelementptr inbounds double, double* %B0p, i64 10
%B11p = getelementptr inbounds double, double* %B0p, i64 11
%B12p = getelementptr inbounds double, double* %B0p, i64 12
%B13p = getelementptr inbounds double, double* %B0p, i64 13
%B14p = getelementptr inbounds double, double* %B0p, i64 14
%B15p = getelementptr inbounds double, double* %B0p, i64 15
store double %A0, double* %B0p, align 8
store double %A1, double* %B1p, align 8
store double %A2, double* %B2p, align 8
store double %A3, double* %B3p, align 8
store double %A4, double* %B4p, align 8
store double %A5, double* %B5p, align 8
store double %A6, double* %B6p, align 8
store double %A7, double* %B7p, align 8
store double %A8, double* %B8p, align 8
store double %A9, double* %B9p, align 8
store double %A10, double* %B10p, align 8
store double %A11, double* %B11p, align 8
store double %A12, double* %B12p, align 8
store double %A13, double* %B13p, align 8
store double %A14, double* %B14p, align 8
store double %A15, double* %B15p, align 8
%A1p = getelementptr inbounds double, ptr %A0p, i64 1
%A2p = getelementptr inbounds double, ptr %A0p, i64 2
%A3p = getelementptr inbounds double, ptr %A0p, i64 3
%A4p = getelementptr inbounds double, ptr %A0p, i64 4
%A5p = getelementptr inbounds double, ptr %A0p, i64 5
%A6p = getelementptr inbounds double, ptr %A0p, i64 6
%A7p = getelementptr inbounds double, ptr %A0p, i64 7
%A8p = getelementptr inbounds double, ptr %A0p, i64 8
%A9p = getelementptr inbounds double, ptr %A0p, i64 9
%A10p = getelementptr inbounds double, ptr %A0p, i64 10
%A11p = getelementptr inbounds double, ptr %A0p, i64 11
%A12p = getelementptr inbounds double, ptr %A0p, i64 12
%A13p = getelementptr inbounds double, ptr %A0p, i64 13
%A14p = getelementptr inbounds double, ptr %A0p, i64 14
%A15p = getelementptr inbounds double, ptr %A0p, i64 15
%A0 = load double, ptr %A0p, align 8
%A1 = load double, ptr %A1p, align 8
%A2 = load double, ptr %A2p, align 8
%A3 = load double, ptr %A3p, align 8
%A4 = load double, ptr %A4p, align 8
%A5 = load double, ptr %A5p, align 8
%A6 = load double, ptr %A6p, align 8
%A7 = load double, ptr %A7p, align 8
%A8 = load double, ptr %A8p, align 8
%A9 = load double, ptr %A9p, align 8
%A10 = load double, ptr %A10p, align 8
%A11 = load double, ptr %A11p, align 8
%A12 = load double, ptr %A12p, align 8
%A13 = load double, ptr %A13p, align 8
%A14 = load double, ptr %A14p, align 8
%A15 = load double, ptr %A15p, align 8
%B1p = getelementptr inbounds double, ptr %B0p, i64 1
%B2p = getelementptr inbounds double, ptr %B0p, i64 2
%B3p = getelementptr inbounds double, ptr %B0p, i64 3
%B4p = getelementptr inbounds double, ptr %B0p, i64 4
%B5p = getelementptr inbounds double, ptr %B0p, i64 5
%B6p = getelementptr inbounds double, ptr %B0p, i64 6
%B7p = getelementptr inbounds double, ptr %B0p, i64 7
%B8p = getelementptr inbounds double, ptr %B0p, i64 8
%B9p = getelementptr inbounds double, ptr %B0p, i64 9
%B10p = getelementptr inbounds double, ptr %B0p, i64 10
%B11p = getelementptr inbounds double, ptr %B0p, i64 11
%B12p = getelementptr inbounds double, ptr %B0p, i64 12
%B13p = getelementptr inbounds double, ptr %B0p, i64 13
%B14p = getelementptr inbounds double, ptr %B0p, i64 14
%B15p = getelementptr inbounds double, ptr %B0p, i64 15
store double %A0, ptr %B0p, align 8
store double %A1, ptr %B1p, align 8
store double %A2, ptr %B2p, align 8
store double %A3, ptr %B3p, align 8
store double %A4, ptr %B4p, align 8
store double %A5, ptr %B5p, align 8
store double %A6, ptr %B6p, align 8
store double %A7, ptr %B7p, align 8
store double %A8, ptr %B8p, align 8
store double %A9, ptr %B9p, align 8
store double %A10, ptr %B10p, align 8
store double %A11, ptr %B11p, align 8
store double %A12, ptr %B12p, align 8
store double %A13, ptr %B13p, align 8
store double %A14, ptr %B14p, align 8
store double %A15, ptr %B15p, align 8
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,32 @@
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

define void @foo(<2 x i64> %x, <4 x i32> %y, i64* %out) #0 {
define void @foo(<2 x i64> %x, <4 x i32> %y, ptr %out) #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[A:%.*]] = extractelement <2 x i64> [[X:%.*]], i64 0
; CHECK-NEXT: [[B:%.*]] = extractelement <4 x i32> [[Y:%.*]], i64 2
; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[B]] to i64
; CHECK-NEXT: [[C:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[A]], i64 [[A]], i64 [[CONV6]])
; CHECK-NEXT: store i64 [[C]], i64* [[OUT:%.*]], align 8
; CHECK-NEXT: store i64 [[C]], ptr [[OUT:%.*]], align 8
; CHECK-NEXT: [[D:%.*]] = extractelement <2 x i64> [[X]], i64 1
; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i32> [[Y]], i64 3
; CHECK-NEXT: [[CONV17:%.*]] = zext i32 [[E]] to i64
; CHECK-NEXT: [[F:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[D]], i64 [[D]], i64 [[CONV17]])
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[OUT]], i32 1
; CHECK-NEXT: store i64 [[F]], i64* [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[OUT]], i32 1
; CHECK-NEXT: store i64 [[F]], ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: ret void
;
%a = extractelement <2 x i64> %x, i32 0
%b = extractelement <4 x i32> %y, i32 2
%conv6 = zext i32 %b to i64
%c = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %conv6)
store i64 %c, i64* %out
store i64 %c, ptr %out
%d = extractelement <2 x i64> %x, i32 1
%e = extractelement <4 x i32> %y, i32 3
%conv17 = zext i32 %e to i64
%f = tail call i64 @llvm.fshl.i64(i64 %d, i64 %d, i64 %conv17)
%arrayidx2 = getelementptr inbounds i64, i64* %out, i32 1
store i64 %f, i64* %arrayidx2
%arrayidx2 = getelementptr inbounds i64, ptr %out, i32 1
store i64 %f, ptr %arrayidx2
ret void
}

Expand Down
102 changes: 51 additions & 51 deletions llvm/test/Transforms/SLPVectorizer/X86/PR31847.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,29 @@
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"

@shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4
@data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4
@data = common local_unnamed_addr global [10 x ptr] zeroinitializer, align 4

define void @flat(i32 %intensity) {
; CHECK-LABEL: @flat(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @shift, align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([10 x i32], ptr @shift, i32 0, i32 1), align 4
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr @data, align 4
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr getelementptr inbounds ([10 x ptr], ptr @data, i32 0, i32 1), align 4
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 [[SHR]]
; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 [[SHR1]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi ptr [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP4]] to i32
; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[CONV]], -128
; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128
; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1
Expand All @@ -36,22 +36,22 @@ define void @flat(i32 %intensity) {
; CHECK-NEXT: [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32 [[SUB12]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]]
; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]]
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]]
; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[D1_DATA_046]], i32 [[IDX_NEG]]
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ADD_PTR]], align 1
; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]]
; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8
; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1
; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]]
; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1
; CHECK-NEXT: store i8 [[CONV17]], ptr [[ADD_PTR]], align 1
; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[D1_DATA_046]], i32 [[ADD]]
; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ADD_PTR18]], align 1
; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0
; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8
; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1
; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: store i8 [[CONV21]], ptr [[ADD_PTR18]], align 1
; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[D1_DATA_046]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32
; CHECK-NEXT: [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128
; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32
; CHECK-NEXT: [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128
; CHECK-NEXT: [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1
Expand All @@ -62,43 +62,43 @@ define void @flat(i32 %intensity) {
; CHECK-NEXT: [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32 [[SUB4_1]], i32 [[SUB12_1]]
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]]
; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]]
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]]
; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR23]], i32 [[IDX_NEG_1]]
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32
; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]]
; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8
; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]]
; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1
; CHECK-NEXT: store i8 [[CONV17_1]], ptr [[ADD_PTR_1]], align 1
; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR23]], i32 [[ADD_1]]
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ADD_PTR18_1]], align 1
; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0
; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8
; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1
; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]]
; CHECK-NEXT: store i8 [[CONV21_1]], ptr [[ADD_PTR18_1]], align 1
; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, ptr [[ADD_PTR23]], i32 [[TMP1]]
; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2
; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128
; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
;
entry:
%0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
%1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
%2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
%3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
%0 = load i32, ptr @shift, align 4
%1 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @shift, i32 0, i32 1), align 4
%2 = load ptr, ptr @data, align 4
%3 = load ptr, ptr getelementptr inbounds ([10 x ptr], ptr @data, i32 0, i32 1), align 4
%shr = lshr i32 1, %0
%arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr
%arrayidx = getelementptr inbounds i8, ptr %2, i32 %shr
%shr1 = lshr i32 1, %1
%arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1
%arrayidx2 = getelementptr inbounds i8, ptr %3, i32 %shr1
br label %for.body

for.cond.cleanup: ; preds = %for.body
ret void

for.body: ; preds = %for.body, %entry
%d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ]
%d1_data.046 = phi ptr [ %3, %entry ], [ %add.ptr23.1, %for.body ]
%y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ]
%4 = load i8, i8* %arrayidx, align 1
%4 = load i8, ptr %arrayidx, align 1
%conv = zext i8 %4 to i32
%sub = add nsw i32 %conv, -128
%5 = load i8, i8* %arrayidx2, align 1
%5 = load i8, ptr %arrayidx2, align 1
%conv3 = zext i8 %5 to i32
%sub4 = add nsw i32 %conv3, -128
%cmp5 = icmp sgt i32 %sub, -1
Expand All @@ -109,22 +109,22 @@ for.body: ; preds = %for.body, %entry
%cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12
%add = add nsw i32 %cond14, %cond
%idx.neg = sub nsw i32 0, %add
%add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg
%6 = load i8, i8* %add.ptr, align 1
%add.ptr = getelementptr inbounds i8, ptr %d1_data.046, i32 %idx.neg
%6 = load i8, ptr %add.ptr, align 1
%conv15 = zext i8 %6 to i32
%add16 = add nsw i32 %conv15, %intensity
%conv17 = trunc i32 %add16 to i8
store i8 %conv17, i8* %add.ptr, align 1
%add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add
%7 = load i8, i8* %add.ptr18, align 1
store i8 %conv17, ptr %add.ptr, align 1
%add.ptr18 = getelementptr inbounds i8, ptr %d1_data.046, i32 %add
%7 = load i8, ptr %add.ptr18, align 1
%not.tobool = icmp eq i8 %7, 0
%conv21 = zext i1 %not.tobool to i8
store i8 %conv21, i8* %add.ptr18, align 1
%add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1
%8 = load i8, i8* %arrayidx, align 1
store i8 %conv21, ptr %add.ptr18, align 1
%add.ptr23 = getelementptr inbounds i8, ptr %d1_data.046, i32 %1
%8 = load i8, ptr %arrayidx, align 1
%conv.1 = zext i8 %8 to i32
%sub.1 = add nsw i32 %conv.1, -128
%9 = load i8, i8* %arrayidx2, align 1
%9 = load i8, ptr %arrayidx2, align 1
%conv3.1 = zext i8 %9 to i32
%sub4.1 = add nsw i32 %conv3.1, -128
%cmp5.1 = icmp sgt i32 %sub.1, -1
Expand All @@ -135,18 +135,18 @@ for.body: ; preds = %for.body, %entry
%cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1
%add.1 = add nsw i32 %cond14.1, %cond.1
%idx.neg.1 = sub nsw i32 0, %add.1
%add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1
%10 = load i8, i8* %add.ptr.1, align 1
%add.ptr.1 = getelementptr inbounds i8, ptr %add.ptr23, i32 %idx.neg.1
%10 = load i8, ptr %add.ptr.1, align 1
%conv15.1 = zext i8 %10 to i32
%add16.1 = add nsw i32 %conv15.1, %intensity
%conv17.1 = trunc i32 %add16.1 to i8
store i8 %conv17.1, i8* %add.ptr.1, align 1
%add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1
%11 = load i8, i8* %add.ptr18.1, align 1
store i8 %conv17.1, ptr %add.ptr.1, align 1
%add.ptr18.1 = getelementptr inbounds i8, ptr %add.ptr23, i32 %add.1
%11 = load i8, ptr %add.ptr18.1, align 1
%not.tobool.1 = icmp eq i8 %11, 0
%conv21.1 = zext i1 %not.tobool.1 to i8
store i8 %conv21.1, i8* %add.ptr18.1, align 1
%add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1
store i8 %conv21.1, ptr %add.ptr18.1, align 1
%add.ptr23.1 = getelementptr inbounds i8, ptr %add.ptr23, i32 %1
%inc.1 = add nsw i32 %y.045, 2
%exitcond.1 = icmp eq i32 %inc.1, 128
br i1 %exitcond.1, label %for.cond.cleanup, label %for.body
Expand Down
102 changes: 49 additions & 53 deletions llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
Original file line number Diff line number Diff line change
@@ -1,85 +1,81 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s

define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
define void @i64_simplified(ptr noalias %st, ptr noalias %ld) {
; CHECK-LABEL: @i64_simplified(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[LD:%.*]], align 8
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], ptr [[ST:%.*]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
%arrayidx1 = getelementptr inbounds i64, ptr %ld, i64 1

%t0 = load i64, i64* %ld, align 8
%t1 = load i64, i64* %arrayidx1, align 8
%t0 = load i64, ptr %ld, align 8
%t1 = load i64, ptr %arrayidx1, align 8

%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
%arrayidx3 = getelementptr inbounds i64, ptr %st, i64 1
%arrayidx4 = getelementptr inbounds i64, ptr %st, i64 2
%arrayidx5 = getelementptr inbounds i64, ptr %st, i64 3

store i64 %t0, i64* %st, align 8
store i64 %t1, i64* %arrayidx3, align 8
store i64 %t0, i64* %arrayidx4, align 8
store i64 %t1, i64* %arrayidx5, align 8
store i64 %t0, ptr %st, align 8
store i64 %t1, ptr %arrayidx3, align 8
store i64 %t0, ptr %arrayidx4, align 8
store i64 %t1, ptr %arrayidx5, align 8
ret void
}

define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
define void @i64_simplifiedi_reversed(ptr noalias %st, ptr noalias %ld) {
; CHECK-LABEL: @i64_simplifiedi_reversed(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[LD:%.*]], align 8
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
; CHECK-NEXT: store <4 x i64> [[SHUFFLE]], ptr [[ST:%.*]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
%arrayidx1 = getelementptr inbounds i64, ptr %ld, i64 1

%t0 = load i64, i64* %ld, align 8
%t1 = load i64, i64* %arrayidx1, align 8
%t0 = load i64, ptr %ld, align 8
%t1 = load i64, ptr %arrayidx1, align 8

%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
%arrayidx3 = getelementptr inbounds i64, ptr %st, i64 1
%arrayidx4 = getelementptr inbounds i64, ptr %st, i64 2
%arrayidx5 = getelementptr inbounds i64, ptr %st, i64 3

store i64 %t1, i64* %st, align 8
store i64 %t0, i64* %arrayidx3, align 8
store i64 %t1, i64* %arrayidx4, align 8
store i64 %t0, i64* %arrayidx5, align 8
store i64 %t1, ptr %st, align 8
store i64 %t0, ptr %arrayidx3, align 8
store i64 %t1, ptr %arrayidx4, align 8
store i64 %t0, ptr %arrayidx5, align 8
ret void
}

define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) {
define void @i64_simplifiedi_extract(ptr noalias %st, ptr noalias %ld) {
; CHECK-LABEL: @i64_simplifiedi_extract(
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[LD]], align 8
; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
; CHECK-NEXT: store i64 [[T0]], i64* [[ST]], align 8
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX3]], align 8
; CHECK-NEXT: store i64 [[T0]], i64* [[ARRAYIDX4]], align 8
; CHECK-NEXT: store i64 [[T1]], i64* [[ARRAYIDX5]], align 8
; CHECK-NEXT: store i64 [[T1]], i64* [[LD]], align 8
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[LD:%.*]], i64 1
; CHECK-NEXT: [[T0:%.*]] = load i64, ptr [[LD]], align 8
; CHECK-NEXT: [[T1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[ST:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, ptr [[ST]], i64 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, ptr [[ST]], i64 3
; CHECK-NEXT: store i64 [[T0]], ptr [[ST]], align 8
; CHECK-NEXT: store i64 [[T0]], ptr [[ARRAYIDX3]], align 8
; CHECK-NEXT: store i64 [[T0]], ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: store i64 [[T1]], ptr [[ARRAYIDX5]], align 8
; CHECK-NEXT: store i64 [[T1]], ptr [[LD]], align 8
; CHECK-NEXT: ret void
;
%arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
%arrayidx1 = getelementptr inbounds i64, ptr %ld, i64 1

%t0 = load i64, i64* %ld, align 8
%t1 = load i64, i64* %arrayidx1, align 8
%t0 = load i64, ptr %ld, align 8
%t1 = load i64, ptr %arrayidx1, align 8

%arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
%arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
%arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
%arrayidx3 = getelementptr inbounds i64, ptr %st, i64 1
%arrayidx4 = getelementptr inbounds i64, ptr %st, i64 2
%arrayidx5 = getelementptr inbounds i64, ptr %st, i64 3

store i64 %t0, i64* %st, align 8
store i64 %t0, i64* %arrayidx3, align 8
store i64 %t0, i64* %arrayidx4, align 8
store i64 %t1, i64* %arrayidx5, align 8
store i64 %t1, i64* %ld, align 8
store i64 %t0, ptr %st, align 8
store i64 %t0, ptr %arrayidx3, align 8
store i64 %t0, ptr %arrayidx4, align 8
store i64 %t1, ptr %arrayidx5, align 8
store i64 %t1, ptr %ld, align 8
ret void
}

76 changes: 34 additions & 42 deletions llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,93 +5,85 @@ define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T:%.*]] = alloca <8 x i32>, align 32
; CHECK-NEXT: [[T1:%.*]] = bitcast <8 x i32>* [[T]] to [8 x i32]*
; CHECK-NEXT: [[T2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[T3:%.*]] = bitcast <8 x i32>* [[T]] to i8*
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds <8 x i32>, <8 x i32>* [[T]], i64 0, i64 0
; CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 1
; CHECK-NEXT: [[T6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 2
; CHECK-NEXT: [[T7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 3
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 4
; CHECK-NEXT: [[T9:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 6
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 5
; CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[T1]], i64 0, i64 7
; CHECK-NEXT: store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, <8 x i32>* [[T]], align 32
; CHECK-NEXT: [[T12:%.*]] = bitcast i32* [[T2]] to i8*
; CHECK-NEXT: [[T13:%.*]] = load i32, i32* [[T4]], align 32
; CHECK-NEXT: [[T14:%.*]] = load i32, i32* [[T5]], align 4
; CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 1
; CHECK-NEXT: [[T6:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 2
; CHECK-NEXT: [[T7:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 3
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 4
; CHECK-NEXT: [[T9:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 6
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 5
; CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds [8 x i32], ptr [[T]], i64 0, i64 7
; CHECK-NEXT: store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, ptr [[T]], align 32
; CHECK-NEXT: [[T13:%.*]] = load i32, ptr [[T]], align 32
; CHECK-NEXT: [[T14:%.*]] = load i32, ptr [[T5]], align 4
; CHECK-NEXT: [[T15:%.*]] = icmp slt i32 [[T14]], [[T13]]
; CHECK-NEXT: [[T16:%.*]] = select i1 [[T15]], i32 [[T14]], i32 [[T13]]
; CHECK-NEXT: [[T17:%.*]] = zext i1 [[T15]] to i32
; CHECK-NEXT: [[T18:%.*]] = load i32, i32* [[T6]], align 8
; CHECK-NEXT: [[T18:%.*]] = load i32, ptr [[T6]], align 8
; CHECK-NEXT: [[T19:%.*]] = icmp slt i32 [[T18]], [[T16]]
; CHECK-NEXT: [[T20:%.*]] = select i1 [[T19]], i32 [[T18]], i32 [[T16]]
; CHECK-NEXT: [[T21:%.*]] = select i1 [[T19]], i32 2, i32 [[T16]]
; CHECK-NEXT: [[T22:%.*]] = load i32, i32* [[T7]], align 4
; CHECK-NEXT: [[T22:%.*]] = load i32, ptr [[T7]], align 4
; CHECK-NEXT: [[T23:%.*]] = icmp slt i32 [[T22]], [[T20]]
; CHECK-NEXT: [[T24:%.*]] = select i1 [[T23]], i32 [[T22]], i32 [[T20]]
; CHECK-NEXT: [[T25:%.*]] = select i1 [[T23]], i32 3, i32 [[T21]]
; CHECK-NEXT: [[T26:%.*]] = load i32, i32* [[T8]], align 16
; CHECK-NEXT: [[T26:%.*]] = load i32, ptr [[T8]], align 16
; CHECK-NEXT: [[T27:%.*]] = icmp slt i32 [[T26]], [[T24]]
; CHECK-NEXT: [[T28:%.*]] = select i1 [[T27]], i32 [[T26]], i32 [[T24]]
; CHECK-NEXT: [[T29:%.*]] = select i1 [[T27]], i32 4, i32 [[T25]]
; CHECK-NEXT: [[T30:%.*]] = load i32, i32* [[T10]], align 4
; CHECK-NEXT: [[T30:%.*]] = load i32, ptr [[T10]], align 4
; CHECK-NEXT: [[T31:%.*]] = icmp slt i32 [[T30]], [[T28]]
; CHECK-NEXT: [[T32:%.*]] = select i1 [[T31]], i32 [[T30]], i32 [[T28]]
; CHECK-NEXT: [[T33:%.*]] = select i1 [[T31]], i32 5, i32 [[T29]]
; CHECK-NEXT: [[T34:%.*]] = load i32, i32* [[T9]], align 8
; CHECK-NEXT: [[T34:%.*]] = load i32, ptr [[T9]], align 8
; CHECK-NEXT: [[T35:%.*]] = icmp slt i32 [[T34]], [[T32]]
; CHECK-NEXT: [[T36:%.*]] = select i1 [[T35]], i32 [[T34]], i32 [[T32]]
; CHECK-NEXT: [[T37:%.*]] = select i1 [[T35]], i32 6, i32 [[T33]]
; CHECK-NEXT: [[T38:%.*]] = load i32, i32* [[T11]], align 4
; CHECK-NEXT: [[T38:%.*]] = load i32, ptr [[T11]], align 4
; CHECK-NEXT: [[T39:%.*]] = icmp slt i32 [[T38]], [[T36]]
; CHECK-NEXT: [[T40:%.*]] = select i1 [[T39]], i32 7, i32 [[T37]]
; CHECK-NEXT: store i32 [[T40]], i32* [[T2]], align 4
; CHECK-NEXT: store i32 [[T40]], ptr [[T2]], align 4
; CHECK-NEXT: ret i32 0
;
bb:
%t = alloca <8 x i32>, align 32
%t1 = bitcast <8 x i32>* %t to [8 x i32]*
%t2 = alloca i32, align 4
%t3 = bitcast <8 x i32>* %t to i8*
%t4 = getelementptr inbounds <8 x i32>, <8 x i32>* %t, i64 0, i64 0
%t5 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 1
%t6 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 2
%t7 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 3
%t8 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 4
%t9 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 6
%t10 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 5
%t11 = getelementptr inbounds [8 x i32], [8 x i32]* %t1, i64 0, i64 7
store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, <8 x i32>* %t, align 32
%t12 = bitcast i32* %t2 to i8*
%t13 = load i32, i32* %t4, align 32
%t14 = load i32, i32* %t5, align 4
%t5 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 1
%t6 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 2
%t7 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 3
%t8 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 4
%t9 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 6
%t10 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 5
%t11 = getelementptr inbounds [8 x i32], ptr %t, i64 0, i64 7
store <8 x i32> <i32 -221320154, i32 -756426931, i32 563883532, i32 382683935, i32 144890241, i32 -1052877364, i32 -1052877364, i32 -1016007675>, ptr %t, align 32
%t13 = load i32, ptr %t, align 32
%t14 = load i32, ptr %t5, align 4
%t15 = icmp slt i32 %t14, %t13
%t16 = select i1 %t15, i32 %t14, i32 %t13
%t17 = zext i1 %t15 to i32
%t18 = load i32, i32* %t6, align 8
%t18 = load i32, ptr %t6, align 8
%t19 = icmp slt i32 %t18, %t16
%t20 = select i1 %t19, i32 %t18, i32 %t16
%t21 = select i1 %t19, i32 2, i32 %t16
%t22 = load i32, i32* %t7, align 4
%t22 = load i32, ptr %t7, align 4
%t23 = icmp slt i32 %t22, %t20
%t24 = select i1 %t23, i32 %t22, i32 %t20
%t25 = select i1 %t23, i32 3, i32 %t21
%t26 = load i32, i32* %t8, align 16
%t26 = load i32, ptr %t8, align 16
%t27 = icmp slt i32 %t26, %t24
%t28 = select i1 %t27, i32 %t26, i32 %t24
%t29 = select i1 %t27, i32 4, i32 %t25
%t30 = load i32, i32* %t10, align 4
%t30 = load i32, ptr %t10, align 4
%t31 = icmp slt i32 %t30, %t28
%t32 = select i1 %t31, i32 %t30, i32 %t28
%t33 = select i1 %t31, i32 5, i32 %t29
%t34 = load i32, i32* %t9, align 8
%t34 = load i32, ptr %t9, align 8
%t35 = icmp slt i32 %t34, %t32
%t36 = select i1 %t35, i32 %t34, i32 %t32
%t37 = select i1 %t35, i32 6, i32 %t33
%t38 = load i32, i32* %t11, align 4
%t38 = load i32, ptr %t11, align 4
%t39 = icmp slt i32 %t38, %t36
%t40 = select i1 %t39, i32 7, i32 %t37
store i32 %t40, i32* %t2, align 4
store i32 %t40, ptr %t2, align 4
ret i32 0
}
23 changes: 11 additions & 12 deletions llvm/test/Transforms/SLPVectorizer/X86/PR35628_1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
; RUN: opt -passes=slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"

define void @mainTest(i32* %ptr) #0 {
define void @mainTest(ptr %ptr) #0 {
; CHECK-LABEL: @mainTest(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR:%.*]], null
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX3:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
Expand All @@ -26,27 +25,27 @@ define void @mainTest(i32* %ptr) #0 {
; CHECK-NEXT: ret void
;
entry:
%cmp = icmp eq i32* %ptr, null
%cmp = icmp eq ptr %ptr, null
br i1 %cmp, label %loop, label %bail_out

loop:
%dummy_phi = phi i32 [ 1, %entry ], [ %18, %loop ]
%0 = load i32, i32 * %ptr , align 4
%0 = load i32, ptr %ptr , align 4
%1 = mul i32 %0, %0
%2 = add i32 1, %1
%3 = getelementptr inbounds i32, i32 * %ptr, i64 1
%4 = load i32, i32 * %3 , align 4
%3 = getelementptr inbounds i32, ptr %ptr, i64 1
%4 = load i32, ptr %3 , align 4
%5 = mul i32 %4, %4
%6 = add i32 %2, %4
%7 = add i32 %6, %5
%8 = getelementptr inbounds i32, i32 *%ptr, i64 2
%9 = load i32, i32 * %8 , align 4
%8 = getelementptr inbounds i32, ptr %ptr, i64 2
%9 = load i32, ptr %8 , align 4
%10 = mul i32 %9, %9
%11 = add i32 %7, %9
%12 = add i32 %11, %10
%13 = sext i32 %9 to i64
%14 = getelementptr inbounds i32, i32 *%ptr, i64 3
%15 = load i32, i32 * %14 , align 4
%14 = getelementptr inbounds i32, ptr %ptr, i64 3
%15 = load i32, ptr %14 , align 4
%16 = mul i32 %15, %15
%17 = add i32 %12, %15
%18 = add i32 %17, %16
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/Transforms/SLPVectorizer/X86/PR35777.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
define { i64, i64 } @patatino(double %arg) {
; CHECK-LABEL: @patatino(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr @global, align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 2), align 16
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[ARG:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP0]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = fptosi <2 x double> [[TMP6]] to <2 x i32>
; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i32> [[TMP7]] to <2 x i64>
Expand All @@ -23,19 +23,19 @@ define { i64, i64 } @patatino(double %arg) {
; CHECK-NEXT: ret { i64, i64 } [[T17]]
;
bb:
%t = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
%t1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
%t = load double, ptr @global, align 16
%t1 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 2), align 16
%t2 = fmul double %t1, %arg
%t3 = fadd double %t, %t2
%t4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
%t4 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 4), align 16
%t5 = fadd double %t4, %t3
%t6 = fptosi double %t5 to i32
%t7 = sext i32 %t6 to i64
%t8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
%t9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
%t8 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 1), align 8
%t9 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 3), align 8
%t10 = fmul double %t9, %arg
%t11 = fadd double %t8, %t10
%t12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
%t12 = load double, ptr getelementptr inbounds ([6 x double], ptr @global, i64 0, i64 5), align 8
%t13 = fadd double %t12, %t11
%t14 = fptosi double %t13 to i32
%t15 = sext i32 %t14 to i64
Expand Down
Loading