[RISCV] Support LMUL!=1 for __attribute__((riscv_rvv_vector_bits(N)))

The first patch supported only LMUL=1 types. This patch supports LMUL!=1. LMUL is length multiplier that allows multiple vector registers to be treated as one large register or a fraction of a single vector register. Supported values for LMUL are 1/8, 1/4, 1/2, 1, 2, 4, and 8. An LMUL=2 type will be twice as large as an LMUL=1 type. An LMUL=1/2 type will be half the size as an LMUL=1 type. Type name with "m2" is LMUL=2, "m4" is LMUL=4. Type name with "mf2" is LMUL=1/2, "mf4" is LMUL=1/4. For the LMUL!=1 types the user will need to scale __riscv_v_fixed_vlen by the LMUL before passing to the attribute. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D150926
llvm · Jun 8, 2023 · 72d23a2 · 72d23a2
1 parent 9c4c67a
commit 72d23a2
Show file tree

Hide file tree

Showing 9 changed files with 1,680 additions and 27 deletions.
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
@@ -2340,11 +2340,14 @@ unsupported for sizeless types.
 
 The attribute can be attached to a single RVV vector (such as ``vint8m1_t``).
 The attribute will be rejected unless
-``N==__riscv_v_fixed_vlen``, the implementation defined feature macro that
+``N==(__riscv_v_fixed_vlen*LMUL)``, the implementation defined feature macro that
 is enabled under the ``-mrvv-vector-bits`` flag. ``__riscv_v_fixed_vlen`` can
 only be a power of 2 between 64 and 65536.
 
-Only ``*m1_t`` (LMUL=1) types are supported at this time.
+For types where LMUL!=1, ``__riscv_v_fixed_vlen`` needs to be scaled by the LMUL
+of the type before passing to the attribute.
+
+``vbool*_t`` types are not supported at this time.
 }];
 }
 

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3078,8 +3078,8 @@ def err_attribute_riscv_rvv_bits_unsupported : Error<
   "%0 is only supported when '-mrvv-vector-bits=<bits>' is specified with a "
   "value of \"zvl\" or a power 2 in the range [64,65536]">;
 def err_attribute_bad_rvv_vector_size : Error<
-  "invalid RVV vector size '%0', must match value set by "
-  "'-mrvv-vector-bits' ('%1')">;
+  "invalid RVV vector size '%0', expected size is '%1' based on LMUL of type "
+  "and '-mrvv-vector-bits'">;
 def err_attribute_invalid_rvv_type : Error<
   "%0 attribute applied to non-RVV type %1">;
 def err_attribute_requires_positive_integer : Error<

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
@@ -9579,11 +9579,8 @@ bool ASTContext::areCompatibleRVVTypes(QualType FirstType,
   auto IsValidCast = [this](QualType FirstType, QualType SecondType) {
     if (const auto *BT = FirstType->getAs<BuiltinType>()) {
       if (const auto *VT = SecondType->getAs<VectorType>()) {
-        if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector)
-          return FirstType->isRVVVLSBuiltinType() &&
-                 VT->getElementType().getCanonicalType() ==
-                     FirstType->getRVVEltType(*this);
-        if (VT->getVectorKind() == VectorType::GenericVector)
+        if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector ||
+            VT->getVectorKind() == VectorType::GenericVector)
           return FirstType->isRVVVLSBuiltinType() &&
                  getTypeSize(SecondType) == getRVVTypeSize(*this, BT) &&
                  hasSameType(VT->getElementType(),

diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
@@ -2448,10 +2448,9 @@ QualType Type::getSveEltType(const ASTContext &Ctx) const {
 bool Type::isRVVVLSBuiltinType() const {
   if (const BuiltinType *BT = getAs<BuiltinType>()) {
     switch (BT->getKind()) {
-    // FIXME: Support more than LMUL 1.
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP) \
     case BuiltinType::Id: \
-      return NF == 1 && (NumEls * ElBits) == llvm::RISCV::RVVBitsPerBlock;
+      return NF == 1;
 #include "clang/Basic/RISCVVTypes.def"
     default:
       return false;

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
@@ -40,7 +40,6 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/TargetParser/RISCVTargetParser.h"
 #include <bitset>
 #include <optional>
 
@@ -8345,9 +8344,10 @@ static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType,
   unsigned MinElts = Info.EC.getKnownMinValue();
 
   // The attribute vector size must match -mrvv-vector-bits.
-  if (VecSize != VScale->first * MinElts * EltSize) {
+  unsigned ExpectedSize = VScale->first * MinElts * EltSize;
+  if (VecSize != ExpectedSize) {
     S.Diag(Attr.getLoc(), diag::err_attribute_bad_rvv_vector_size)
-        << VecSize << VScale->first * llvm::RISCV::RVVBitsPerBlock;
+        << VecSize << ExpectedSize;
     Attr.setInvalid();
     return;
   }

diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
@@ -16,9 +16,22 @@ typedef __rvv_uint64m1_t vuint64m1_t;
 typedef __rvv_float32m1_t vfloat32m1_t;
 typedef __rvv_float64m1_t vfloat64m1_t;
 
+typedef __rvv_int8m2_t vint8m2_t;
+typedef __rvv_uint8m2_t vuint8m2_t;
+typedef __rvv_int16m2_t vint16m2_t;
+typedef __rvv_uint16m2_t vuint16m2_t;
+typedef __rvv_int32m2_t vint32m2_t;
+typedef __rvv_uint32m2_t vuint32m2_t;
+typedef __rvv_int64m2_t vint64m2_t;
+typedef __rvv_uint64m2_t vuint64m2_t;
+typedef __rvv_float32m2_t vfloat32m2_t;
+typedef __rvv_float64m2_t vfloat64m2_t;
+
 typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
+typedef vint32m2_t fixed_int32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2)));
 
 fixed_int32m1_t global_vec;
+fixed_int32m2_t global_vec_m2;
 
 // CHECK-LABEL: @test_ptr_to_global(
 // CHECK-NEXT:  entry:
@@ -75,3 +88,59 @@ fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) {
 fixed_int32m1_t test_cast(vint32m1_t vec) {
   return __riscv_vadd(global_vec, vec, __riscv_v_fixed_vlen/32);
 }
+
+// CHECK-LABEL: @test_ptr_to_global_m2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 8
+// CHECK-NEXT:    [[GLOBAL_VEC_PTR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr @global_vec_m2, ptr [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[TMP0]], align 8
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v16i32(<vscale x 2 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m2_t test_ptr_to_global_m2() {
+  fixed_int32m2_t *global_vec_ptr;
+  global_vec_ptr = &global_vec_m2;
+  return *global_vec_ptr;
+}
+
+//
+// Test casting pointer from fixed-length array to scalable vector.
+// CHECK-LABEL: @array_arg_m2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 8
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ARR:%.*]], ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <16 x i32>, ptr [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[ARRAYIDX]], align 8
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v16i32(<vscale x 2 x i32> undef, <16 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m2_t array_arg_m2(fixed_int32m2_t arr[]) {
+  return arr[0];
+}
+
+// CHECK-LABEL: @test_cast_m2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 8
+// CHECK-NEXT:    [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 4
+// CHECK-NEXT:    store <vscale x 4 x i32> [[VEC:%.*]], ptr [[VEC_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr @global_vec_m2, align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x i32>, ptr [[VEC_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> poison, <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP1]], i64 16)
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    store <16 x i32> [[CASTFIXEDSVE]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE1:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v16i32(<vscale x 2 x i32> undef, <16 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE1]]
+//
+fixed_int32m2_t test_cast_m2(vint32m2_t vec) {
+  return __riscv_vadd(global_vec_m2, vec, __riscv_v_fixed_vlen/16);
+}