Skip to content

Commit

Permalink
[2/11][POC][Clang][RISCV] Define RVV tuple types
Browse files Browse the repository at this point in the history
For the cover letter of this patch-set, please checkout D146872.

Depends on D146872.

This is the 2nd patch of the patch-set. This patch originates from
D97264. This patch further allows local variable declaration and
function parameter passing by adjustment in clang lowering.

Test cases are provided to demonstrate the LLVM IR generated.

Note: This patch is currently only a proof-of-concept with only a
single RVV tuple type declared here, the rest will be added when
the concept of this patch-set is accepted.

Authored-by: eop Chen <eop.chen@sifive.com>
Co-Authored-by: Hsiangkai Wang <kai.wang@sifive.com>

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D146873
  • Loading branch information
eopXD committed May 22, 2023
1 parent 3bc1ea5 commit 5e92298
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 26 deletions.
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/RISCVVTypes.def
Expand Up @@ -144,6 +144,10 @@ RVV_PREDICATE_TYPE("__rvv_bool16_t", RvvBool16, RvvBool16Ty, 4)
RVV_PREDICATE_TYPE("__rvv_bool32_t", RvvBool32, RvvBool32Ty, 2)
RVV_PREDICATE_TYPE("__rvv_bool64_t", RvvBool64, RvvBool64Ty, 1)

//===- Tuple vector types -------------------------------------------------===//

RVV_VECTOR_TYPE_INT("__rvv_int32m1x2_t", RvvInt32m1x2, RvvInt32m1x2Ty, 2, 32, 2, true)

#undef RVV_VECTOR_TYPE_FLOAT
#undef RVV_VECTOR_TYPE_INT
#undef RVV_VECTOR_TYPE
Expand Down
59 changes: 40 additions & 19 deletions clang/lib/CodeGen/CGCall.cpp
Expand Up @@ -3132,30 +3132,51 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
STy->getNumElements() > 1) {
uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
llvm::Type *DstTy = Ptr.getElementType();
uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
llvm::TypeSize PtrElementSize =
CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType());
if (StructSize.isScalable()) {
assert(STy->containsHomogeneousScalableVectorTypes() &&
"ABI only supports structure with homogeneous scalable vector "
"type");
assert(StructSize == PtrElementSize &&
"Only allow non-fractional movement of structure with"
"homogeneous scalable vector type");
assert(STy->getNumElements() == NumIRArgs);

llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
auto *AI = Fn->getArg(FirstIRArg + i);
AI->setName(Arg->getName() + ".coerce" + Twine(i));
LoadedStructValue =
Builder.CreateInsertValue(LoadedStructValue, AI, i);
}

Address AddrToStoreInto = Address::invalid();
if (SrcSize <= DstSize) {
AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
Builder.CreateStore(LoadedStructValue, Ptr);
} else {
AddrToStoreInto =
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
}
uint64_t SrcSize = StructSize.getFixedValue();
uint64_t DstSize = PtrElementSize.getFixedValue();

Address AddrToStoreInto = Address::invalid();
if (SrcSize <= DstSize) {
AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
} else {
AddrToStoreInto =
CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
}

assert(STy->getNumElements() == NumIRArgs);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
auto AI = Fn->getArg(FirstIRArg + i);
AI->setName(Arg->getName() + ".coerce" + Twine(i));
Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
Builder.CreateStore(AI, EltPtr);
}
assert(STy->getNumElements() == NumIRArgs);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
auto AI = Fn->getArg(FirstIRArg + i);
AI->setName(Arg->getName() + ".coerce" + Twine(i));
Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
Builder.CreateStore(AI, EltPtr);
}

if (SrcSize > DstSize) {
Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
if (SrcSize > DstSize) {
Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
}
}

} else {
// Simple case, just do a coerced store of the argument into the alloca.
assert(NumIRArgs == 1);
Expand Down
23 changes: 16 additions & 7 deletions clang/lib/CodeGen/CodeGenTypes.cpp
Expand Up @@ -630,13 +630,22 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
#include "clang/Basic/PPCTypes.def"
#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
#include "clang/Basic/RISCVVTypes.def"
{
ASTContext::BuiltinVectorTypeInfo Info =
Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
Info.EC.getKnownMinValue() *
Info.NumVectors);
}
{
ASTContext::BuiltinVectorTypeInfo Info =
Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
// Tuple types are expressed as aggregregate types of the same scalable
// vector type (e.g. vint32m1x2_t is two vint32m1_t, which is {<vscale x
// 2 x i32>, <vscale x 2 x i32>}).
if (Info.NumVectors != 1) {
llvm::Type *EltTy = llvm::ScalableVectorType::get(
ConvertType(Info.ElementType), Info.EC.getKnownMinValue());
llvm::SmallVector<llvm::Type *, 4> EltTys(Info.NumVectors, EltTy);
return llvm::StructType::get(getLLVMContext(), EltTys);
}
return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
Info.EC.getKnownMinValue() *
Info.NumVectors);
}
#define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS) \
case BuiltinType::Id: { \
if (BuiltinType::Id == BuiltinType::WasmExternRef) \
Expand Down
@@ -0,0 +1,90 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -O0 \
// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=O0
// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s --check-prefix=AFTER_MEM2REG


#include <riscv_vector.h>

// Declare local variable
// O0-LABEL: define dso_local void @foo
// O0-SAME: () #[[ATTR0:[0-9]+]] {
// O0-NEXT: entry:
// O0-NEXT: [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
// O0-NEXT: ret void
//
// AFTER_MEM2REG-LABEL: define dso_local void @foo
// AFTER_MEM2REG-SAME: () #[[ATTR0:[0-9]+]] {
// AFTER_MEM2REG-NEXT: entry:
// AFTER_MEM2REG-NEXT: ret void
//
void foo() {
__rvv_int32m1x2_t v_tuple;
}

// Declare local variable and return
// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
// O0-SAME: () #[[ATTR0]] {
// O0-NEXT: entry:
// O0-NEXT: [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
// O0-NEXT: [[TMP0:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
// O0-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]]
//
// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
// AFTER_MEM2REG-SAME: () #[[ATTR0]] {
// AFTER_MEM2REG-NEXT: entry:
// AFTER_MEM2REG-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } undef
//
__rvv_int32m1x2_t bar() {
__rvv_int32m1x2_t v_tuple;
return v_tuple;
}

// Pass as function parameter
// O0-LABEL: define dso_local void @baz
// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
// O0-NEXT: entry:
// O0-NEXT: [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
// O0-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
// O0-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
// O0-NEXT: [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
// O0-NEXT: ret void
//
// AFTER_MEM2REG-LABEL: define dso_local void @baz
// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
// AFTER_MEM2REG-NEXT: entry:
// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
// AFTER_MEM2REG-NEXT: ret void
//
void baz(__rvv_int32m1x2_t v_tuple) {
}

// Pass as function parameter and return
// O0-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
// O0-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
// O0-NEXT: entry:
// O0-NEXT: [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
// O0-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
// O0-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], ptr [[V_TUPLE]], align 4
// O0-NEXT: [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
// O0-NEXT: store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
// O0-NEXT: [[TMP2:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE_ADDR]], align 4
// O0-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]]
//
// AFTER_MEM2REG-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
// AFTER_MEM2REG-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
// AFTER_MEM2REG-NEXT: entry:
// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } poison, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
// AFTER_MEM2REG-NEXT: ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]]
//
__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) {
return v_tuple;
}
3 changes: 3 additions & 0 deletions clang/test/Sema/riscv-types.c
Expand Up @@ -133,6 +133,9 @@ void bar(void) {

// CHECK: __rvv_int8mf2_t x43;
__rvv_int8mf2_t x43;

// CHECK: __rvv_int32m1x2_t x44;
__rvv_int32m1x2_t x44;
}

typedef __rvv_bool4_t vbool4_t;
Expand Down

0 comments on commit 5e92298

Please sign in to comment.