-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LV] Pre-commit test for #128062 #164801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LV] Pre-commit test for #128062 #164801
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesIn preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #128062. Patch is 24.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164801.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 5c62ca3ff3d01..9a655bba280e4 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -700,3 +700,359 @@ exit:
%result = add i64 %cast.ptr, %0
ret i64 %result
}
+
+; FIXME: Unprofitable vectorization.
+; Should be smarter about handling interleave groups.
+define void @pr128062(ptr %dst.start, i64 %n, i8 %a) {
+; DEFAULT-LABEL: @pr128062(
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; DEFAULT-NEXT: [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; DEFAULT-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; DEFAULT-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT: vector.ph:
+; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
+; DEFAULT: vector.body:
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; DEFAULT-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; DEFAULT-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; DEFAULT-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; DEFAULT-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; DEFAULT-NEXT: [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT: [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT: [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT: [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; DEFAULT-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; DEFAULT-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; DEFAULT-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; DEFAULT-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT: store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; DEFAULT-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; DEFAULT-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; DEFAULT-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; DEFAULT-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; DEFAULT-NEXT: [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; DEFAULT-NEXT: [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; DEFAULT-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; DEFAULT-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; DEFAULT-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP45]], ptr [[TMP28]], align 1
+; DEFAULT-NEXT: store i8 [[TMP46]], ptr [[TMP29]], align 1
+; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP30]], align 1
+; DEFAULT-NEXT: store i8 [[TMP48]], ptr [[TMP31]], align 1
+; DEFAULT-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; DEFAULT-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; DEFAULT-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; DEFAULT-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; DEFAULT-NEXT: [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; DEFAULT-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; DEFAULT-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; DEFAULT-NEXT: [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; DEFAULT-NEXT: [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; DEFAULT-NEXT: [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; DEFAULT-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; DEFAULT-NEXT: [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP66]], ptr [[TMP49]], align 1
+; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP50]], align 1
+; DEFAULT-NEXT: store i8 [[TMP68]], ptr [[TMP51]], align 1
+; DEFAULT-NEXT: store i8 [[TMP69]], ptr [[TMP52]], align 1
+; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; DEFAULT: middle.block:
+; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; DEFAULT: scalar.ph:
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; DEFAULT-NEXT: br label [[LOOP:%.*]]
+; DEFAULT: loop:
+; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT: [[DST:%.*]] = phi ptr [ [[DST_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT: [[DST_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
+; DEFAULT-NEXT: [[LOAD_DST:%.*]] = load i8, ptr [[DST]], align 1
+; DEFAULT-NEXT: [[DST_EXT:%.*]] = zext i8 [[LOAD_DST]] to i16
+; DEFAULT-NEXT: [[MUL_DST_0:%.*]] = mul nuw i16 [[DST_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_0:%.*]] = udiv i16 [[MUL_DST_0]], 255
+; DEFAULT-NEXT: [[TRUNC_0:%.*]] = trunc nuw i16 [[UDIV_0]] to i8
+; DEFAULT-NEXT: [[VAL_0:%.*]] = add i8 [[A]], [[TRUNC_0]]
+; DEFAULT-NEXT: store i8 [[VAL_0]], ptr [[DST]], align 1
+; DEFAULT-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
+; DEFAULT-NEXT: [[LOAD_DST_1:%.*]] = load i8, ptr [[GEP_DST_1]], align 1
+; DEFAULT-NEXT: [[DST_1_EXT:%.*]] = zext i8 [[LOAD_DST_1]] to i16
+; DEFAULT-NEXT: [[MUL_DST_1:%.*]] = mul nuw i16 [[DST_1_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_1:%.*]] = udiv i16 [[MUL_DST_1]], 255
+; DEFAULT-NEXT: [[TRUNC_1:%.*]] = trunc nuw i16 [[UDIV_1]] to i8
+; DEFAULT-NEXT: [[VAL_1:%.*]] = add i8 [[A]], [[TRUNC_1]]
+; DEFAULT-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
+; DEFAULT-NEXT: [[LOAD_DST_2:%.*]] = load i8, ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT: [[DST_2_EXT:%.*]] = zext i8 [[LOAD_DST_2]] to i16
+; DEFAULT-NEXT: [[MUL_DST_2:%.*]] = mul nuw i16 [[DST_2_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_2:%.*]] = udiv i16 [[MUL_DST_2]], 255
+; DEFAULT-NEXT: [[TRUNC_2:%.*]] = trunc nuw i16 [[UDIV_2]] to i8
+; DEFAULT-NEXT: [[VAL_2:%.*]] = add i8 [[A]], [[TRUNC_2]]
+; DEFAULT-NEXT: store i8 [[VAL_2]], ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
+; DEFAULT-NEXT: [[LOAD_DST_3:%.*]] = load i8, ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT: [[DST_3_EXT:%.*]] = zext i8 [[LOAD_DST_3]] to i16
+; DEFAULT-NEXT: [[MUL_DST_3:%.*]] = mul nuw i16 [[DST_3_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_3:%.*]] = udiv i16 [[MUL_DST_3]], 255
+; DEFAULT-NEXT: [[TRUNC_3:%.*]] = trunc nuw i16 [[UDIV_3]] to i8
+; DEFAULT-NEXT: [[VAL_3:%.*]] = add i8 [[A]], [[TRUNC_3]]
+; DEFAULT-NEXT: store i8 [[VAL_3]], ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], -4
+; DEFAULT-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; DEFAULT-NEXT: br i1 [[EXIT_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; DEFAULT: exit:
+; DEFAULT-NEXT: ret void
+;
+; STRIDED-LABEL: @pr128062(
+; STRIDED-NEXT: entry:
+; STRIDED-NEXT: [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; STRIDED-NEXT: [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; STRIDED-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; STRIDED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; STRIDED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; STRIDED: vector.ph:
+; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; STRIDED-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
+; STRIDED: vector.body:
+; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; STRIDED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; STRIDED-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; STRIDED-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; STRIDED-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; STRIDED-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; STRIDED-NEXT: [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT: [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT: [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT: [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; STRIDED-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; STRIDED-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; STRIDED-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; STRIDED-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; STRIDED-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; STRIDED-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; STRIDED-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; STRIDED-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; STRIDED-NEXT: store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT: store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; STRIDED-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; STRIDED-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; STRIDED-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; STRIDED-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; STRIDED-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; STRIDED-NEXT: [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; STRIDED-NEXT: [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; STRIDED-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; STRIDED-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; STRIDED-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; STRIDED-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; STRIDED-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; STRIDED-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; STRIDED-NEXT: [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; STRIDED-NEXT: [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; STRIDED-NEXT: [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; STRIDED-NEXT: [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; STRIDED-NEXT: store i8 [[TMP45]], ptr [[TMP28]], align 1
+; STRIDED-NEXT: store i8 [[TMP46]], ptr [[TMP29]], align 1
+; STRIDED-NEXT: store i8 [[TMP47]], ptr [[TMP30]], align 1
+; STRIDED-NEXT: store i8 [[TMP48]], ptr [[TMP31]], align 1
+; STRIDED-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; STRIDED-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; STRIDED-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; STRIDED-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; STRIDED-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; STRIDED-NEXT: [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; STRIDED-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; STRIDED-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; STRIDED-NEXT: [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; STRIDED-NEXT: [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; STRIDED-NEXT: [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; STRIDED-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; STRIDED-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; STRIDED-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; STRIDED-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; STRIDED-NEXT: [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; STRIDED-NEXT: [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; STRIDED-NEXT: [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; STRIDED-NEXT: [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; STRIDED-NEXT: store i8 [[TMP66]], ptr [[TMP49]], align 1
+; STRIDED-NEXT: store i8 [[TMP67]], ptr [[TMP50]], align 1
+; STRIDED-NEXT: store i8 [[TMP68]], ptr [[TMP51]], align 1
+; STRIDED-NEXT: store i8 [[TMP69]], ptr [[TMP52]], align 1
+; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; STRIDED: middle.block:
+; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; STRIDED: scalar.ph:
+; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; STRIDED-NEXT: br label [[LOOP:%.*]]
+; STRIDED: loop:
+; STRIDED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; STRIDED...
[truncated]
|
c46a9cd to
73eb659
Compare
llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/pr128062-interleaved-accesses-narrow-group.ll
Outdated
Show resolved
Hide resolved
In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from llvm#128062.
73eb659 to
456948d
Compare
|
Gentle ping. |
|
Gentle ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you check this works as expected? I think to trigger narrowInterleaveGroups, a target-specific test is needed, as it checks the size of the vector against the target vector size.
I got a diff with #164839, although that PR needs more thought? |
In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #128062.