From f8a80b12ddaea0a47577edc163f68f9f7dc94b3e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Wed, 12 Nov 2025 14:57:18 +0800
Subject: [PATCH 1/5] [VPlan] Handle ExplicitVectorLength in
 opcodeMayReadOrWriteFromMemory

This has no effect for now from what I can tell but is needed if we ever want to extend narrowInterleaveGroups to handle EVL tail folded loops.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 80cd112dbcd8..488470d24796 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1259,6 +1259,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
   case VPInstruction::ExtractLastLanePerPart:
   case VPInstruction::ExtractPenultimateElement:
   case VPInstruction::ActiveLaneMask:
+  case VPInstruction::ExplicitVectorLength:
   case VPInstruction::FirstActiveLane:
   case VPInstruction::FirstOrderRecurrenceSplice:
   case VPInstruction::LogicalAnd:
---
 llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 80cd112dbcd8a..488470d247968 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1259,6 +1259,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
   case VPInstruction::ExtractLastLanePerPart:
   case VPInstruction::ExtractPenultimateElement:
   case VPInstruction::ActiveLaneMask:
+  case VPInstruction::ExplicitVectorLength:
   case VPInstruction::FirstActiveLane:
   case VPInstruction::FirstOrderRecurrenceSplice:
   case VPInstruction::LogicalAnd:

From 6bb2fe095f6695b08eb6216cc6219ad4cd5aa8ff Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Wed, 12 Nov 2025 20:57:13 +0800
Subject: [PATCH 2/5] Simplify known EVLs

---
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 30 +++++++++++++++++++
 .../LoopVectorize/RISCV/low-trip-count.ll     | 28 ++++++++---------
 ...ctor-loop-backedge-elimination-with-evl.ll |  3 +-
 3 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 48bd697397f41..e540498acb8b8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1816,6 +1816,35 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
   return true;
 }
 
+/// From the definition of llvm.experimental.get.vector.length,
+/// VPInstruction::ExplicitVectorLength(%AVL) = %AVL when %AVL <= VF.
+static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
+                             PredicatedScalarEvolution &PSE) {
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_deep(Plan.getEntry()))) {
+    for (VPRecipeBase &R : *VPBB) {
+      VPValue *AVL;
+      if (!match(&R, m_EVL(m_VPValue(AVL))))
+        continue;
+
+      const SCEV *AVLSCEV = vputils::getSCEVExprForVPValue(AVL, *PSE.getSE());
+      if (isa<SCEVCouldNotCompute>(AVLSCEV))
+        continue;
+      const SCEV *VFSCEV = PSE.getSE()->getElementCount(AVLSCEV->getType(), VF);
+      if (!PSE.getSE()->isKnownPredicate(CmpInst::ICMP_ULE, AVLSCEV, VFSCEV))
+        continue;
+
+      VPBuilder Builder(&R);
+      VPValue *AVLZExt = Builder.createScalarZExtOrTrunc(
+          AVL, Type::getInt32Ty(Plan.getContext()), AVLSCEV->getType(),
+          R.getDebugLoc());
+      R.getVPSingleValue()->replaceAllUsesWith(AVLZExt);
+      return true;
+    }
+  }
+  return false;
+}
+
 void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
                                          unsigned BestUF,
                                          PredicatedScalarEvolution &PSE) {
@@ -1825,6 +1854,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
   bool MadeChange = tryToReplaceALMWithWideALM(Plan, BestVF, BestUF);
   MadeChange |= simplifyBranchConditionForVFAndUF(Plan, BestVF, BestUF, PSE);
   MadeChange |= optimizeVectorInductionWidthForTCAndVFUF(Plan, BestVF, BestUF);
+  MadeChange |= simplifyKnownEVL(Plan, BestVF, PSE);
 
   if (MadeChange) {
     Plan.setVF(BestVF);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index 8ef53cade01ac..83e044e2b4a78 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -125,12 +125,11 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 8, i32 4, i1 true)
-; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP9:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 4 x i1> splat (i1 true), i32 8)
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl <vscale x 4 x i8> [[VP_OP_LOAD]], splat (i8 1)
-; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP12:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr align 1 [[DST:%.*]], <vscale x 4 x i1> splat (i1 true), i32 8)
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 4 x i8> [[TMP6]], [[VP_OP_LOAD1]]
-; CHECK-NEXT:    call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[DST]], <vscale x 4 x i1> splat (i1 true), i32 8)
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -164,12 +163,11 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 16, i32 8, i1 true)
-; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP1:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 8 x i1> splat (i1 true), i32 16)
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl <vscale x 8 x i8> [[VP_OP_LOAD]], splat (i8 1)
-; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP4:%.*]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[DST:%.*]], <vscale x 8 x i1> splat (i1 true), i32 16)
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 8 x i8> [[TMP6]], [[VP_OP_LOAD1]]
-; CHECK-NEXT:    call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP11]], ptr align 1 [[TMP4]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP11]], ptr align 1 [[DST]], <vscale x 8 x i1> splat (i1 true), i32 16)
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -204,12 +202,11 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 32, i32 16, i1 true)
-; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP1:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 16 x i1> splat (i1 true), i32 32)
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
-; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP4:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], <vscale x 16 x i1> splat (i1 true), i32 32)
 ; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 16 x i8> [[TMP6]], [[VP_OP_LOAD1]]
-; CHECK-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP11]], ptr align 1 [[TMP4]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP11]], ptr align 1 [[DST]], <vscale x 16 x i1> splat (i1 true), i32 32)
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -243,12 +240,11 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 24, i32 16, i1 true)
-; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[SRC:%.*]], <vscale x 16 x i1> splat (i1 true), i32 24)
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl <vscale x 16 x i8> [[VP_OP_LOAD]], splat (i8 1)
-; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[DST:%.*]], <vscale x 16 x i1> splat (i1 true), i32 24)
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 16 x i8> [[TMP6]], [[VP_OP_LOAD1]]
-; CHECK-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP7]], ptr align 1 [[DST]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP7]], ptr align 1 [[DST]], <vscale x 16 x i1> splat (i1 true), i32 24)
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
index 1676461863583..f1dda3d5a2f91 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll
@@ -10,8 +10,7 @@ define void @foo(ptr %arg) #0 {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 3, i32 2, i1 true)
-; CHECK-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ARG]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> zeroinitializer, ptr align 8 [[ARG]], <vscale x 2 x i1> splat (i1 true), i32 3)
 ; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[EXIT:.*]]

From a4aa19e31f28421f51d9ee4c0765125c53c94eef Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Wed, 12 Nov 2025 21:05:17 +0800
Subject: [PATCH 3/5] Rename ZExt -> Trunc to reflect that we're casting from
 i64 -> i32

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e540498acb8b8..8cdc859267cf3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1835,10 +1835,10 @@ static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
         continue;
 
       VPBuilder Builder(&R);
-      VPValue *AVLZExt = Builder.createScalarZExtOrTrunc(
+      VPValue *Trunc = Builder.createScalarZExtOrTrunc(
           AVL, Type::getInt32Ty(Plan.getContext()), AVLSCEV->getType(),
           R.getDebugLoc());
-      R.getVPSingleValue()->replaceAllUsesWith(AVLZExt);
+      R.getVPSingleValue()->replaceAllUsesWith(Trunc);
       return true;
     }
   }

From 027c4623ee09c54005d9647234d145e86f9afeb9 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Thu, 13 Nov 2025 00:17:38 +0800
Subject: [PATCH 4/5] Store SE in var

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8cdc859267cf3..2a9c9b5495bcc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1827,11 +1827,12 @@ static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
       if (!match(&R, m_EVL(m_VPValue(AVL))))
         continue;
 
-      const SCEV *AVLSCEV = vputils::getSCEVExprForVPValue(AVL, *PSE.getSE());
+      ScalarEvolution &SE = *PSE.getSE();
+      const SCEV *AVLSCEV = vputils::getSCEVExprForVPValue(AVL, SE);
       if (isa<SCEVCouldNotCompute>(AVLSCEV))
         continue;
-      const SCEV *VFSCEV = PSE.getSE()->getElementCount(AVLSCEV->getType(), VF);
-      if (!PSE.getSE()->isKnownPredicate(CmpInst::ICMP_ULE, AVLSCEV, VFSCEV))
+      const SCEV *VFSCEV = SE.getElementCount(AVLSCEV->getType(), VF);
+      if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, AVLSCEV, VFSCEV))
         continue;
 
       VPBuilder Builder(&R);

From fa1844c86aade7dd7a81a33b47e36e26e3386242 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke@igalia.com>
Date: Thu, 13 Nov 2025 00:19:17 +0800
Subject: [PATCH 5/5] Remove single use builder var

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2a9c9b5495bcc..9ea74d58a67e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1835,8 +1835,7 @@ static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF,
       if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, AVLSCEV, VFSCEV))
         continue;
 
-      VPBuilder Builder(&R);
-      VPValue *Trunc = Builder.createScalarZExtOrTrunc(
+      VPValue *Trunc = VPBuilder(&R).createScalarZExtOrTrunc(
           AVL, Type::getInt32Ty(Plan.getContext()), AVLSCEV->getType(),
           R.getDebugLoc());
       R.getVPSingleValue()->replaceAllUsesWith(Trunc);