Skip to content

Commit

Permalink
[Clang][AArch64][SME] Add vector read/write (mova) intrinsics
Browse files Browse the repository at this point in the history
This patch adds support for the following SME ACLE intrinsics (as defined
in https://arm-software.github.io/acle/main/acle.html):

  - svread_hor_za8[_s8]_m    // also for u8
  - svread_hor_za16[_s16]_m  // also for u16, f16, bf16
  - svread_hor_za32[_s32]_m  // also for u32, f32
  - svread_hor_za64[_s64]_m  // also for u64, f64
  - svread_hor_za128[_s8]_m  // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
  - svread_ver_za8[_s8]_m    // also for u8
  - svread_ver_za16[_s16]_m  // also for u16, f16, bf16
  - svread_ver_za32[_s32]_m  // also for u32, f32
  - svread_ver_za64[_s64]_m  // also for u64, f64
  - svread_ver_za128[_s8]_m  // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
  - svwrite_hor_za8[_s8]_m   // also for u8
  - svwrite_hor_za16[_s16]_m // also for u16, f16, bf16
  - svwrite_hor_za32[_s32]_m // also for u32, f32
  - svwrite_hor_za64[_s64]_m // also for u64, f64
  - svwrite_hor_za128[_s8]_m // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
  - svwrite_ver_za8[_s8]_m   // also for u8
  - svwrite_ver_za16[_s16]_m // also for u16, f16, bf16
  - svwrite_ver_za32[_s32]_m // also for u32, f32
  - svwrite_ver_za64[_s64]_m // also for u64, f64
  - svwrite_ver_za128[_s8]_m // also for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64

Co-authored-by: Sagar Kulkarni <sagar.kulkarni1@huawei.com>

Reviewed By: sdesmalen, kmclaughlin

Differential Revision: https://reviews.llvm.org/D128648
  • Loading branch information
bryanpkc committed Jul 20, 2023
1 parent 697f605 commit 6dc94c5
Show file tree
Hide file tree
Showing 9 changed files with 2,318 additions and 14 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/TargetBuiltins.h
Expand Up @@ -307,6 +307,8 @@ namespace clang {
bool isTupleCreate() const { return Flags & IsTupleCreate; }
bool isTupleGet() const { return Flags & IsTupleGet; }
bool isTupleSet() const { return Flags & IsTupleSet; }
bool isReadZA() const { return Flags & IsReadZA; }
bool isWriteZA() const { return Flags & IsWriteZA; }

uint64_t getBits() const { return Flags; }
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
Expand Down
42 changes: 42 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Expand Up @@ -72,3 +72,45 @@ defm SVST1_ZA16 : ZAStore<"za16", "s", "aarch64_sme_st1h", [ImmCheck<0, ImmCheck
defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;

////////////////////////////////////////////////////////////////////////////////
// Read horizontal/vertical ZA slices

multiclass ZARead<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
let TargetGuard = "sme" in {
def NAME # _H : SInst<"svread_hor_" # n_suffix # "[_{d}]", "ddPimi", t,
MergeOp1, i_prefix # "_horiz",
[IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>;

def NAME # _V : SInst<"svread_ver_" # n_suffix # "[_{d}]", "ddPimi", t,
MergeOp1, i_prefix # "_vert",
[IsReadZA, IsStreaming, IsSharedZA, IsPreservesZA], ch>;
}
}

defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>, ImmCheck<4, ImmCheck0_15>]>;
defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>, ImmCheck<4, ImmCheck0_7>]>;
defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>;
defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>;
defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>, ImmCheck<4, ImmCheck0_0>]>;

////////////////////////////////////////////////////////////////////////////////
// Write horizontal/vertical ZA slices

multiclass ZAWrite<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> {
let TargetGuard = "sme" in {
def NAME # _H : SInst<"svwrite_hor_" # n_suffix # "[_{d}]", "vimiPd", t,
MergeOp1, i_prefix # "_horiz",
[IsWriteZA, IsStreaming, IsSharedZA], ch>;

def NAME # _V : SInst<"svwrite_ver_" # n_suffix # "[_{d}]", "vimiPd", t,
MergeOp1, i_prefix # "_vert",
[IsWriteZA, IsStreaming, IsSharedZA], ch>;
}
}

defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>;
defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>;
defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>;
defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>;
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/arm_sve_sme_incl.td
Expand Up @@ -220,6 +220,8 @@ def IsStreaming : FlagType<0x2000000000>;
def IsStreamingCompatible : FlagType<0x4000000000>;
def IsSharedZA : FlagType<0x8000000000>;
def IsPreservesZA : FlagType<0x10000000000>;
def IsReadZA : FlagType<0x20000000000>;
def IsWriteZA : FlagType<0x40000000000>;

// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
class ImmCheckType<int val> {
Expand Down
19 changes: 19 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -9478,6 +9478,23 @@ Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, NewOps);
}

Value *CodeGenFunction::EmitSMEReadWrite(SVETypeFlags TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
auto *VecTy = getSVEType(TypeFlags);
Function *F = CGM.getIntrinsic(IntID, VecTy);
if (TypeFlags.isReadZA()) {
Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
Ops[3] = EmitTileslice(Ops[4], Ops[3]);
Ops.erase(&Ops[4]);
} else if (TypeFlags.isWriteZA()) {
Ops[1] = EmitTileslice(Ops[2], Ops[1]);
Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy);
Ops.erase(&Ops[3]);
}
return Builder.CreateCall(F, Ops);
}

// Limit the usage of scalable llvm IR generated by the ACLE by using the
// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
Expand Down Expand Up @@ -9936,6 +9953,8 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
SVETypeFlags TypeFlags(Builtin->TypeModifier);
if (TypeFlags.isLoad() || TypeFlags.isStore())
return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);

/// Should not happen
return nullptr;
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Expand Up @@ -4280,6 +4280,9 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
llvm::Value *EmitSMEReadWrite(SVETypeFlags TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);

llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
Expand Down

0 comments on commit 6dc94c5

Please sign in to comment.