[SVE] Remove usage of getMaxVScale for AArch64, in favour of IR Attri…

…bute Removed AArch64 usage of the getMaxVScale interface, replacing it with the vscale_range(min, max) IR Attribute. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D106277
llvm · Aug 17, 2021 · ef198cd · ef198cd
1 parent 9de882f
commit ef198cd
Show file tree

Hide file tree

Showing 20 changed files with 98 additions and 56 deletions.
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
@@ -871,6 +871,11 @@ class TargetInfo : public virtual TransferrableTargetInfo,
   /// across the current set of primary and secondary targets.
   virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;
 
+  /// Returns target-specific min and max values VScale_Range.
+  virtual Optional<std::pair<unsigned, unsigned>>
+  getVScaleRange(const LangOptions &LangOpts) const {
+    return None;
+  }
   /// The __builtin_clz* and __builtin_ctz* built-in
   /// functions are specified to have undefined results for zero inputs, but
   /// on targets that support these operations in a way that provides

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
@@ -424,6 +424,17 @@ ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
                                              Builtin::FirstTSBuiltin);
 }
 
+Optional<std::pair<unsigned, unsigned>>
+AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
+  if (LangOpts.ArmSveVectorBits) {
+    unsigned VScale = LangOpts.ArmSveVectorBits / 128;
+    return std::pair<unsigned, unsigned>(VScale, VScale);
+  }
+  if (hasFeature("sve"))
+    return std::pair<unsigned, unsigned>(0, 16);
+  return None;
+}
+
 bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
   return Feature == "aarch64" || Feature == "arm64" || Feature == "arm" ||
          (Feature == "neon" && (FPU & NeonMode)) ||

diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
@@ -96,6 +96,9 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
 
   ArrayRef<Builtin::Info> getTargetBuiltins() const override;
 
+  Optional<std::pair<unsigned, unsigned>>
+  getVScaleRange(const LangOptions &LangOpts) const override;
+
   bool hasFeature(StringRef Feature) const override;
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -484,11 +484,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
   //    function.
   CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
 
-  // Add vscale attribute if appropriate.
-  if (getLangOpts().ArmSveVectorBits) {
-    unsigned VScale = getLangOpts().ArmSveVectorBits / 128;
-    CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),
-                                                             VScale, VScale));
+  // Add vscale_range attribute if appropriate.
+  Optional<std::pair<unsigned, unsigned>> VScaleRange =
+      getContext().getTargetInfo().getVScaleRange(getLangOpts());
+  if (VScaleRange) {
+    CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
+        getLLVMContext(), VScaleRange.getValue().first,
+        VScaleRange.getValue().second));
   }
 
   // If we generated an unreachable return block, delete it now.

diff --git a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
@@ -3,10 +3,13 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
 
 // CHECK-LABEL: @func() #0
 // CHECK: attributes #0 = { {{.*}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} }
-// CHECK-NONE-NOT: vscale_range
+// CHECK-NONE: attributes #0 = { {{.*}} vscale_range(0,16) {{.*}} }
 void func() {}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1589,7 +1589,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
   ElementCount LegalVF = LT.second.getVectorElementCount();
   InstructionCost MemOpCost =
       getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
-  return LT.first * MemOpCost * getMaxNumElements(LegalVF);
+  return LT.first * MemOpCost * getMaxNumElements(LegalVF, I->getFunction());
 }
 
 bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -125,22 +125,25 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
     return ST->getMinVectorRegisterBitWidth();
   }
 
-  Optional<unsigned> getMaxVScale() const {
-    if (ST->hasSVE())
-      return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
-    return BaseT::getMaxVScale();
-  }
 
   /// Try to return an estimate cost factor that can be used as a multiplier
   /// when scalarizing an operation for a vector with ElementCount \p VF.
   /// For scalable vectors this currently takes the most pessimistic view based
   /// upon the maximum possible value for vscale.
-  unsigned getMaxNumElements(ElementCount VF) const {
+  unsigned getMaxNumElements(ElementCount VF,
+                             const Function *F = nullptr) const {
     if (!VF.isScalable())
       return VF.getFixedValue();
-    Optional<unsigned> MaxNumVScale = getMaxVScale();
-    assert(MaxNumVScale && "Expected valid max vscale value");
-    return *MaxNumVScale * VF.getKnownMinValue();
+
+    unsigned MaxNumVScale = 16;
+    if (F && F->hasFnAttribute(Attribute::VScaleRange)) {
+      unsigned VScaleMax =
+          F->getFnAttribute(Attribute::VScaleRange).getVScaleRangeArgs().second;
+      if (VScaleMax > 0)
+        MaxNumVScale = VScaleMax;
+    }
+
+    return MaxNumVScale * VF.getKnownMinValue();
   }
 
   unsigned getMaxInterleaveFactor(unsigned VF);

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5636,6 +5636,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
 
   // Limit MaxScalableVF by the maximum safe dependence distance.
   Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+  if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+    unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange)
+                             .getVScaleRangeArgs()
+                             .second;
+    if (VScaleMax > 0)
+      MaxVScale = VScaleMax;
+  }
   MaxScalableVF = ElementCount::getScalable(
       MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
   if (!MaxScalableVF)

diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
@@ -2,7 +2,7 @@
 
 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s | FileCheck %s
 
-define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
 ; CHECK-LABEL: 'masked_gathers'
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction:   %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction:   %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32

diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
@@ -2,7 +2,7 @@
 
 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s | FileCheck %s
 
-define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
 ; CHECK-LABEL: 'masked_scatters'
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -18,7 +18,7 @@
 ;   return a;
 ; }
 ;
-define i32 @PR33613(double* %b, double %j, i32 %d) {
+define i32 @PR33613(double* %b, double %j, i32 %d) #0 {
 ; CHECK-VF4UF2-LABEL: @PR33613
 ; CHECK-VF4UF2: vector.body
 ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x double> [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ]
@@ -66,7 +66,7 @@ for.body:
 ; }
 ;
 ; Check that the sext sank after the load in the vector loop.
-define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
+define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 {
 ; CHECK-VF4UF1-LABEL: @PR34711
 ; CHECK-VF4UF1: vector.body
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
@@ -100,5 +100,6 @@ for.end:
   ret void
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -4,7 +4,7 @@
 ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true  -hints-allow-reordering=true  -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
 ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
 
-define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_strict
 ; CHECK-ORDERED: vector.body:
 ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -49,7 +49,7 @@ for.end:
   ret float %add
 }
 
-define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_strict_unroll
 ; CHECK-ORDERED: vector.body:
 ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.*]], %vector.body ]
@@ -113,7 +113,7 @@ for.end:
   ret float %add
 }
 
-define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_strict_interleave
 ; CHECK-ORDERED: entry
 ; CHECK-ORDERED: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1
@@ -206,7 +206,7 @@ for.end:
   ret void
 }
 
-define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_of_sum
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -268,7 +268,7 @@ for.end:                                 ; preds = %for.body, %entry
   ret float %res
 }
 
-define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_conditional
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -343,7 +343,7 @@ for.end:
 }
 
 ; Negative test - loop contains multiple fadds which we cannot safely reorder
-define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
+define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 {
 ; CHECK-ORDERED-LABEL: @fadd_multiple
 ; CHECK-ORDERED-NOT: vector.body
 
@@ -390,6 +390,7 @@ for.end:                                         ; preds = %for.body
   ret float %rdx
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !0 = distinct !{!0, !3, !6, !8}
 !1 = distinct !{!1, !3, !7, !8}
 !2 = distinct !{!2, !4, !6, !8}

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
@@ -7,7 +7,7 @@
 ; Test that the MaxVF for the following loop, that has no dependence distances,
 ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
 ; (maximized bandwidth for i8 in the loop).
-define void @test0(i32* %a, i8* %b, i32* %c) {
+define void @test0(i32* %a, i8* %b, i32* %c) #0 {
 ; CHECK: LV: Checking a loop in "test0"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -40,7 +40,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 64 elements, is calculated as (maxvscale = 16) * 4.
-define void @test1(i32* %a, i8* %b) {
+define void @test1(i32* %a, i8* %b) #0 {
 ; CHECK: LV: Checking a loop in "test1"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -74,7 +74,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 32 elements, is calculated as (maxvscale = 16) * 2.
-define void @test2(i32* %a, i8* %b) {
+define void @test2(i32* %a, i8* %b) #0 {
 ; CHECK: LV: Checking a loop in "test2"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -108,7 +108,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 16 elements, is calculated as (maxvscale = 16) * 1.
-define void @test3(i32* %a, i8* %b) {
+define void @test3(i32* %a, i8* %b) #0 {
 ; CHECK: LV: Checking a loop in "test3"
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -142,7 +142,7 @@ exit:
 
 ; Test the fallback mechanism when scalable vectors are not feasible due
 ; to e.g. dependence distance.
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
 ; CHECK: LV: Checking a loop in "test4"
 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -172,3 +172,5 @@ loop:
 exit:
   ret void
 }
+
+attributes #0 = { vscale_range(0, 16) }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -45,7 +45,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test1
 ; CHECK: <4 x i32>
-define void @test1(i32* %a, i32* %b) {
+define void @test1(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -90,7 +90,7 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test2
 ; CHECK: <4 x i32>
-define void @test2(i32* %a, i32* %b) {
+define void @test2(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -138,7 +138,7 @@ exit:
 ; CHECK-DBG: LV: Using user VF vscale x 2.
 ; CHECK-LABEL: @test3
 ; CHECK: <vscale x 2 x i32>
-define void @test3(i32* %a, i32* %b) {
+define void @test3(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -190,7 +190,7 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test4
 ; CHECK: <4 x i32>
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -238,7 +238,7 @@ exit:
 ; CHECK-DBG: LV: Using user VF vscale x 4
 ; CHECK-LABEL: @test5
 ; CHECK: <vscale x 4 x i32>
-define void @test5(i32* %a, i32* %b) {
+define void @test5(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -289,7 +289,7 @@ exit:
 ; CHECK-DBG: Selecting VF: vscale x 4.
 ; CHECK-LABEL: @test6
 ; CHECK: <vscale x 4 x i32>
-define void @test6(i32* %a, i32* %b) {
+define void @test6(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -322,7 +322,7 @@ exit:
 ; CHECK-NO-SVE-LABEL: @test_no_sve
 ; CHECK-NO-SVE: <4 x i32>
 ; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
-define void @test_no_sve(i32* %a, i32* %b) {
+define void @test_no_sve(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -356,7 +356,7 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test_no_max_vscale
 ; CHECK: <4 x i32>
-define void @test_no_max_vscale(i32* %a, i32* %b) {
+define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
 entry:
   br label %loop
 
@@ -378,6 +378,7 @@ exit:
   ret void
 }
 
+attributes #0 = { vscale_range(0, 16) }
 !21 = !{!21, !22, !23}
 !22 = !{!"llvm.loop.vectorize.width", i32 4}
 !23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}