From c6c49691aef869df4d4f2482ad2211c2e4ace1d8 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 20 Oct 2025 09:52:17 +0800 Subject: [PATCH 1/3] [VPlan] Set flags when constructing zexts using VPWidenCastRecipe createWidenCast doesn't set the flag type, so when we simplify trunc (zext nneg x) -> zext x we would hit an assertion in CSE that the flag types don't match with other VPWidenCastRecipes that weren't simplified. This fixes it the same way trunc flags are handled too. As an aside I think it should be correct to preserve the nneg flag in this case since the input operand is still non-negative after the transform. But that's left to another PR. --- .../Vectorize/LoopVectorizationPlanner.h | 2 + .../Transforms/LoopVectorize/cse-casts.ll | 63 ++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 7651ba16b35cf..3fed003282f2b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -325,6 +325,8 @@ class VPBuilder { VPIRFlags Flags; if (Opcode == Instruction::Trunc) Flags = VPIRFlags::TruncFlagsTy(false, false); + else if (Opcode == Instruction::ZExt) + Flags = VPIRFlags::NonNegFlagsTy(false); return tryInsertInstruction( new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags)); } diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index e923560bb77e8..b8ce35dc704d2 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -319,8 +319,9 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK: [[PRED_STORE_CONTINUE60]]: ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -349,3 +350,61 @@ loop: exit: ret void } + +define void @simplified_cast_preserves_irflag_type(ptr %p, ptr %q, ptr %r) { +; CHECK-LABEL: define void @simplified_cast_preserves_irflag_type( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 2 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 2 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] +; CHECK-NEXT: [[BOUND06:%.*]] = icmp ult ptr [[R]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND17:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]] +; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META4:![0-9]+]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +; CHECK-NEXT: store i16 [[TMP2]], ptr [[Q]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META9:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP2]], ptr [[R]], align 2, !alias.scope [[META11:![0-9]+]], !noalias [[META4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %x = load i8, ptr %p + %x.i32 = zext i8 %x to i32 + %trunc = trunc i32 %x.i32 to i16 + store i16 %trunc, ptr %q + %x.i16 = zext i8 %x to i16 + store i16 %x.i16, ptr %r + %iv.next = add i64 %iv, 2 + %done = icmp eq i64 %iv.next, 0 + br i1 %done, label %exit, label %loop + +exit: + ret void +} From d15fc606a51e837a116831ae259195331ff905f8 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 20 Oct 2025 17:36:03 +0800 Subject: [PATCH 2/3] Add noalias --- .../Transforms/LoopVectorize/cse-casts.ll | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index b8ce35dc704d2..5a118a55227d2 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -351,44 +351,29 @@ exit: ret void } -define void @simplified_cast_preserves_irflag_type(ptr %p, ptr %q, ptr %r) { +define void @simplified_cast_preserves_irflag_type(ptr noalias %p, ptr noalias %q, ptr noalias %r) { ; CHECK-LABEL: define void @simplified_cast_preserves_irflag_type( -; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]]) { +; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], ptr noalias [[R:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] -; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 2 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[R]], i64 2 -; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 1 -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP1]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[R]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[Q]], [[SCEVGEP2]] -; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[P]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] -; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] -; CHECK-NEXT: [[BOUND06:%.*]] = icmp ult ptr [[R]], [[SCEVGEP2]] -; CHECK-NEXT: [[BOUND17:%.*]] = icmp ult ptr [[P]], [[SCEVGEP1]] -; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]] -; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META4:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[P]], align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 -; CHECK-NEXT: store i16 [[TMP2]], ptr [[Q]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META9:![0-9]+]] -; CHECK-NEXT: store i16 [[TMP2]], ptr [[R]], align 2, !alias.scope [[META11:![0-9]+]], !noalias [[META4]] +; CHECK-NEXT: store i16 [[TMP2]], ptr [[Q]], align 2 +; CHECK-NEXT: store i16 [[TMP2]], ptr [[R]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop From 5e8aa980ed653ae6d9b9fc6eab32c698415ab3af Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 20 Oct 2025 17:58:29 +0800 Subject: [PATCH 3/3] Update exit condition --- llvm/test/Transforms/LoopVectorize/cse-casts.ll | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index 5a118a55227d2..fb45745eff1cb 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -368,12 +368,11 @@ define void @simplified_cast_preserves_irflag_type(ptr noalias %p, ptr noalias % ; CHECK-NEXT: store i16 [[TMP2]], ptr [[Q]], align 2 ; CHECK-NEXT: store i16 [[TMP2]], ptr [[R]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: ; entry: br label %loop @@ -387,8 +386,8 @@ loop: %x.i16 = zext i8 %x to i16 store i16 %x.i16, ptr %r %iv.next = add i64 %iv, 2 - %done = icmp eq i64 %iv.next, 0 - br i1 %done, label %exit, label %loop + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop exit: ret void