From e6bb662fa8e4639a315226d0a5d90dfd17162e16 Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Wed, 24 Sep 2025 09:04:41 -0700 Subject: [PATCH 1/2] [SLPVectorizer] A test for widening constant strided loads. Precommit a test. --- .../RISCV/basic-strided-loads.ll | 111 ++++++++++++++++-- 1 file changed, 101 insertions(+), 10 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll index 4f52227c6511e..85610bd293f55 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll @@ -527,15 +527,6 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) { ret void } -; TODO: We want to generate this code: -; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) { -; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0 -; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0 -; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4) -; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8> -; store <16 x i8> %bitcast_, ptr %gep_s0, align 1 -; ret void -; } define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) { ; CHECK-LABEL: define void @constant_stride_widen_no_reordering( ; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] { @@ -545,7 +536,6 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> ; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1 ; CHECK-NEXT: ret void -; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0 %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1 %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2 @@ -617,6 +607,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) ret void } +; TODO: We want to generate this code: +; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 { +; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0 +; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0 +; %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4) +; %2 = bitcast <4 x i32> %1 to <16 x i8> +; store <16 x i8> %2, ptr %gep_s0, align 1 +; ret void +; } +define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) { +; CHECK-LABEL: define void @constant_stride_widen_no_reordering( +; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0 +; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100 +; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200 +; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300 +; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1 +; CHECK-NEXT: ret void +; + %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0 + %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1 + %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2 + %gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3 + %gep_l4 = getelementptr inbounds i8, ptr %pl, i64 100 + %gep_l5 = getelementptr inbounds i8, ptr %pl, i64 101 + %gep_l6 = getelementptr inbounds i8, ptr %pl, i64 102 + %gep_l7 = getelementptr inbounds i8, ptr %pl, i64 103 + %gep_l8 = getelementptr inbounds i8, ptr %pl, i64 200 + %gep_l9 = getelementptr inbounds i8, ptr %pl, i64 201 + %gep_l10 = getelementptr inbounds i8, ptr %pl, i64 202 + %gep_l11 = getelementptr inbounds i8, ptr %pl, i64 203 + %gep_l12 = getelementptr inbounds i8, ptr %pl, i64 300 + %gep_l13 = getelementptr inbounds i8, ptr %pl, i64 301 + %gep_l14 = getelementptr inbounds i8, ptr %pl, i64 302 + %gep_l15 = getelementptr inbounds i8, ptr %pl, i64 303 + + %load0 = load i8, ptr %gep_l0 , align 1 + %load1 = load i8, ptr %gep_l1 , align 1 + %load2 = load i8, ptr %gep_l2 , align 1 + %load3 = load i8, ptr %gep_l3 , align 1 + %load4 = load i8, ptr %gep_l4 , align 1 + %load5 = load i8, ptr %gep_l5 , align 1 + %load6 = load i8, ptr %gep_l6 , align 1 + %load7 = load i8, ptr %gep_l7 , align 1 + %load8 = load i8, ptr %gep_l8 , align 1 + %load9 = load i8, ptr %gep_l9 , align 1 + %load10 = load i8, ptr %gep_l10, align 1 + %load11 = load i8, ptr %gep_l11, align 1 + %load12 = load i8, ptr %gep_l12, align 1 + %load13 = load i8, ptr %gep_l13, align 1 + %load14 = load i8, ptr %gep_l14, align 1 + %load15 = load i8, ptr %gep_l15, align 1 + + %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0 + %gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1 + %gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2 + %gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3 + %gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4 + %gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5 + %gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6 + %gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7 + %gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8 + %gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9 + %gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10 + %gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11 + %gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12 + %gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13 + %gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14 + %gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15 + + store i8 %load0, ptr %gep_s0, align 1 + store i8 %load1, ptr %gep_s1, align 1 + store i8 %load2, ptr %gep_s2, align 1 + store i8 %load3, ptr %gep_s3, align 1 + store i8 %load4, ptr %gep_s4, align 1 + store i8 %load5, ptr %gep_s5, align 1 + store i8 %load6, ptr %gep_s6, align 1 + store i8 %load7, ptr %gep_s7, align 1 + store i8 %load8, ptr %gep_s8, align 1 + store i8 %load9, ptr %gep_s9, align 1 + store i8 %load10, ptr %gep_s10, align 1 + store i8 %load11, ptr %gep_s11, align 1 + store i8 %load12, ptr %gep_s12, align 1 + store i8 %load13, ptr %gep_s13, align 1 + store i8 %load14, ptr %gep_s14, align 1 + store i8 %load15, ptr %gep_s15, align 1 + + ret void +} ; TODO: We want to generate this code: ; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) { ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0 From 6321dcdc6d90e5c5f958198740df5cd6d7a49a79 Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Tue, 30 Sep 2025 08:33:47 -0700 Subject: [PATCH 2/2] renamed old constant_stride_wided_no_reoredering to constant_stride_masked_no_reordering --- .../SLPVectorizer/RISCV/basic-strided-loads.ll | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll index 85610bd293f55..02e05b2e4138a 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/basic-strided-loads.ll @@ -527,15 +527,16 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) { ret void } -define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) { -; CHECK-LABEL: define void @constant_stride_widen_no_reordering( +define void @constant_stride_masked_no_reordering(ptr %pl, i64 %stride, ptr %ps) { +; CHECK-LABEL: define void @constant_stride_masked_no_reordering( ; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0 ; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = call <28 x i8> @llvm.masked.load.v28i8.p0(ptr [[GEP_L0]], i32 1, <28 x i1> , <28 x i8> poison) -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> -; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> +; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 1 ; CHECK-NEXT: ret void +; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0 %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1 %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2