Skip to content

Commit

Permalink
[AArch64][FMV] Support feature MOPS in Function Multi Versioning. (#7…
Browse files Browse the repository at this point in the history
…8788)

The patch adds support for FEAT_MOPS (Memory Copy and Memory Set
instructions) in Function Multi Versioning. The bits [19:16] of the
system register ID_AA64ISAR2_EL1 indicate whether FEAT_MOPS is
implemented in AArch64 state. This information is accessible via ELF
hwcaps.
  • Loading branch information
labrinea committed Jan 23, 2024
1 parent 8c41e3f commit 179ba12
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 17 deletions.
30 changes: 15 additions & 15 deletions clang/test/CodeGen/attr-target-version.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { re
inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; }
inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; }
inline int __attribute__((target_version("memtag3+rcpc3"))) fmv_inline(void) { return 11; }
inline int __attribute__((target_version("memtag3+rcpc3+mops"))) fmv_inline(void) { return 11; }
inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; }

__attribute__((target_version("ls64"))) int fmv_e(void);
Expand Down Expand Up @@ -272,36 +272,36 @@ int hoo(void) {
// CHECK-NEXT: ret ptr @fmv_inline._Mfp16Mfp16MfcmaMsme
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864726312827224064
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3Mmops
// CHECK: resolver_else2:
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
// CHECK: resolver_return3:
// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
// CHECK: resolver_else4:
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240
// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 34359773184
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359773184
// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]]
// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
// CHECK: resolver_return5:
// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
// CHECK: resolver_else6:
// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 288265560523800576
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 288265560523800576
// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240
// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]]
// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
// CHECK: resolver_return7:
// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3
// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
// CHECK: resolver_else8:
// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968
Expand Down Expand Up @@ -609,7 +609,7 @@ int hoo(void) {
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3Mmops
// CHECK-SAME: () #[[ATTR23:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 11
Expand Down Expand Up @@ -768,7 +768,7 @@ int hoo(void) {
// CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" }
// CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" }
// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" }
// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte,+rcpc,+rcpc3" }
// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mops,+mte,+rcpc,+rcpc3" }
// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" }
//.
// CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Sema/attr-target-clones-aarch64.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s

void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3"))) no_def(void);
void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3+mops"))) no_def(void);

// expected-warning@+1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
void __attribute__((target_clones("default+sha3"))) warn1(void);
Expand Down
1 change: 1 addition & 0 deletions clang/test/SemaCXX/attr-target-version.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ void __attribute__((target_version("vmull"))) wrong_tv(void);
void __attribute__((target_version("dotprod"))) no_def(void);
void __attribute__((target_version("rdm+fp"))) no_def(void);
void __attribute__((target_version("rcpc3"))) no_def(void);
void __attribute__((target_version("mops"))) no_def(void);

// expected-error@+1 {{no matching function for call to 'no_def'}}
void foo(void) { no_def(); }
Expand Down
1 change: 1 addition & 0 deletions compiler-rt/lib/builtins/cpu_model/aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ enum CPUFeatures {
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
FEAT_MOPS,
FEAT_MAX,
FEAT_EXT = 62, // Reserved to indicate presence of additional features field
// in __aarch64_cpu_features
Expand Down
2 changes: 2 additions & 0 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
setCPUFeature(FEAT_SME_F64);
if (hwcap2 & HWCAP2_MOPS)
setCPUFeature(FEAT_MOPS);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
getCPUFeature(ID_AA64PFR1_EL1, ftr);
Expand Down
3 changes: 3 additions & 0 deletions compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,6 @@
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
#ifndef HWCAP2_MOPS
#define HWCAP2_MOPS (1ULL << 43)
#endif
3 changes: 2 additions & 1 deletion llvm/include/llvm/TargetParser/AArch64TargetParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ enum CPUFeatures {
FEAT_SME_I64,
FEAT_SME2,
FEAT_RCPC3,
FEAT_MOPS,
FEAT_MAX,
FEAT_EXT = 62,
FEAT_INIT
Expand Down Expand Up @@ -246,7 +247,7 @@ inline constexpr ExtensionInfo Extensions[] = {
{"memtag", AArch64::AEK_MTE, "+mte", "-mte", FEAT_MEMTAG, "", 440},
{"memtag2", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG2, "+mte", 450},
{"memtag3", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG3, "+mte", 460},
{"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_INIT, "", 0},
{"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_MOPS, "+mops", 650},
{"pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", FEAT_INIT, "", 0},
{"pmull", AArch64::AEK_NONE, {}, {}, FEAT_PMULL, "+aes,+fp-armv8,+neon", 160},
{"pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon", FEAT_INIT, "", 0},
Expand Down

0 comments on commit 179ba12

Please sign in to comment.