[2/11][POC][Clang][RISCV] Define RVV tuple types

For the cover letter of this patch-set, please checkout D146872. Depends on D146872. This is the 2nd patch of the patch-set. This patch originates from D97264. This patch further allows local variable declaration and function parameter passing by adjustment in clang lowering. Test cases are provided to demonstrate the LLVM IR generated. Note: This patch is currently only a proof-of-concept with only a single RVV tuple type declared here, the rest will be added when the concept of this patch-set is accepted. Authored-by: eop Chen <eop.chen@sifive.com> Co-Authored-by: Hsiangkai Wang <kai.wang@sifive.com> Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D146873
llvm · May 22, 2023 · 5e92298 · 5e92298
1 parent 3bc1ea5
commit 5e92298
Show file tree

Hide file tree

Showing 5 changed files with 153 additions and 26 deletions.
diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def
@@ -144,6 +144,10 @@ RVV_PREDICATE_TYPE("__rvv_bool16_t", RvvBool16, RvvBool16Ty, 4)
 RVV_PREDICATE_TYPE("__rvv_bool32_t", RvvBool32, RvvBool32Ty, 2)
 RVV_PREDICATE_TYPE("__rvv_bool64_t", RvvBool64, RvvBool64Ty, 1)
 
+//===- Tuple vector types -------------------------------------------------===//
+
+RVV_VECTOR_TYPE_INT("__rvv_int32m1x2_t", RvvInt32m1x2, RvvInt32m1x2Ty, 2,  32, 2, true)
+
 #undef RVV_VECTOR_TYPE_FLOAT
 #undef RVV_VECTOR_TYPE_INT
 #undef RVV_VECTOR_TYPE

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
@@ -3132,30 +3132,51 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
       if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
           STy->getNumElements() > 1) {
-        uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
-        llvm::Type *DstTy = Ptr.getElementType();
-        uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
+        llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
+        llvm::TypeSize PtrElementSize =
+            CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType());
+        if (StructSize.isScalable()) {
+          assert(STy->containsHomogeneousScalableVectorTypes() &&
+                 "ABI only supports structure with homogeneous scalable vector "
+                 "type");
+          assert(StructSize == PtrElementSize &&
+                 "Only allow non-fractional movement of structure with"
+                 "homogeneous scalable vector type");
+          assert(STy->getNumElements() == NumIRArgs);
+
+          llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            auto *AI = Fn->getArg(FirstIRArg + i);
+            AI->setName(Arg->getName() + ".coerce" + Twine(i));
+            LoadedStructValue =
+                Builder.CreateInsertValue(LoadedStructValue, AI, i);
+          }
 
-        Address AddrToStoreInto = Address::invalid();
-        if (SrcSize <= DstSize) {
-          AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
+          Builder.CreateStore(LoadedStructValue, Ptr);
         } else {
-          AddrToStoreInto =
-            CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
-        }
+          uint64_t SrcSize = StructSize.getFixedValue();
+          uint64_t DstSize = PtrElementSize.getFixedValue();
+
+          Address AddrToStoreInto = Address::invalid();
+          if (SrcSize <= DstSize) {
+            AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
+          } else {
+            AddrToStoreInto =
+                CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
+          }
 
-        assert(STy->getNumElements() == NumIRArgs);
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          auto AI = Fn->getArg(FirstIRArg + i);
-          AI->setName(Arg->getName() + ".coerce" + Twine(i));
-          Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
-          Builder.CreateStore(AI, EltPtr);
-        }
+          assert(STy->getNumElements() == NumIRArgs);
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            auto AI = Fn->getArg(FirstIRArg + i);
+            AI->setName(Arg->getName() + ".coerce" + Twine(i));
+            Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
+            Builder.CreateStore(AI, EltPtr);
+          }
 
-        if (SrcSize > DstSize) {
-          Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+          if (SrcSize > DstSize) {
+            Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+          }
         }
-
       } else {
         // Simple case, just do a coerced store of the argument into the alloca.
         assert(NumIRArgs == 1);

diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -630,13 +630,22 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
 #include "clang/Basic/PPCTypes.def"
 #define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
 #include "clang/Basic/RISCVVTypes.def"
-    {
-      ASTContext::BuiltinVectorTypeInfo Info =
-          Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
-      return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
-                                           Info.EC.getKnownMinValue() *
-                                           Info.NumVectors);
-    }
+      {
+        ASTContext::BuiltinVectorTypeInfo Info =
+            Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
+        // Tuple types are expressed as aggregregate types of the same scalable
+        // vector type (e.g. vint32m1x2_t is two vint32m1_t, which is {<vscale x
+        // 2 x i32>, <vscale x 2 x i32>}).
+        if (Info.NumVectors != 1) {
+          llvm::Type *EltTy = llvm::ScalableVectorType::get(
+              ConvertType(Info.ElementType), Info.EC.getKnownMinValue());
+          llvm::SmallVector<llvm::Type *, 4> EltTys(Info.NumVectors, EltTy);
+          return llvm::StructType::get(getLLVMContext(), EltTys);
+        }
+        return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
+                                             Info.EC.getKnownMinValue() *
+                                                 Info.NumVectors);
+      }
 #define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS)                  \
   case BuiltinType::Id: {                                                      \
     if (BuiltinType::Id == BuiltinType::WasmExternRef)                         \

diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c
@@ -0,0 +1,90 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -O0 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefix=O0
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefix=AFTER_MEM2REG
+
+
+#include <riscv_vector.h>
+
+// Declare local variable
+// O0-LABEL: define dso_local void @foo
+// O0-SAME: () #[[ATTR0:[0-9]+]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    ret void
+//
+// AFTER_MEM2REG-LABEL: define dso_local void @foo
+// AFTER_MEM2REG-SAME: () #[[ATTR0:[0-9]+]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    ret void
+//
+void foo() {
+  __rvv_int32m1x2_t v_tuple;
+}
+
+// Declare local variable and return
+// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
+// O0-SAME: () #[[ATTR0]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[TMP0:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// O0-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]]
+//
+// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
+// AFTER_MEM2REG-SAME: () #[[ATTR0]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } undef
+//
+__rvv_int32m1x2_t bar() {
+  __rvv_int32m1x2_t v_tuple;
+  return v_tuple;
+}
+
+// Pass as function parameter
+// O0-LABEL: define dso_local void @baz
+// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// O0-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
+// O0-NEXT:    [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT:    ret void
+//
+// AFTER_MEM2REG-LABEL: define dso_local void @baz
+// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// AFTER_MEM2REG-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// AFTER_MEM2REG-NEXT:    ret void
+//
+void baz(__rvv_int32m1x2_t v_tuple) {
+}
+
+// Pass as function parameter and return
+// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
+// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// O0-NEXT:  entry:
+// O0-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// O0-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// O0-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
+// O0-NEXT:    [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// O0-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT:    [[TMP2:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE_ADDR]], align 4
+// O0-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]]
+//
+// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
+// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// AFTER_MEM2REG-NEXT:  entry:
+// AFTER_MEM2REG-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// AFTER_MEM2REG-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// AFTER_MEM2REG-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]]
+//
+__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) {
+  return v_tuple;
+}
diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c
@@ -133,6 +133,9 @@ void bar(void) {
 
   // CHECK: __rvv_int8mf2_t x43;
   __rvv_int8mf2_t x43;
+
+  // CHECK: __rvv_int32m1x2_t x44;
+  __rvv_int32m1x2_t x44;
 }
 
 typedef __rvv_bool4_t vbool4_t;