Skip to content

Commit

Permalink
[X86] AMD Zen 4 Initial enablement
Browse files Browse the repository at this point in the history
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D139073
  • Loading branch information
ganeshgit committed Dec 17, 2022
1 parent 2242611 commit 1f057e3
Show file tree
Hide file tree
Showing 19 changed files with 199 additions and 5 deletions.
4 changes: 4 additions & 0 deletions clang/lib/Basic/Targets/X86.cpp
Expand Up @@ -607,6 +607,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_ZNVER3:
defineCPUMacros(Builder, "znver3");
break;
case CK_ZNVER4:
defineCPUMacros(Builder, "znver4");
break;
case CK_Geode:
defineCPUMacros(Builder, "geode");
break;
Expand Down Expand Up @@ -1440,6 +1443,7 @@ Optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_ZNVER1:
case CK_ZNVER2:
case CK_ZNVER3:
case CK_ZNVER4:
// Deprecated
case CK_x86_64:
case CK_x86_64_v2:
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGen/target-builtin-noerror.c
Expand Up @@ -135,4 +135,5 @@ void verifycpustrings(void) {
(void)__builtin_cpu_is("znver1");
(void)__builtin_cpu_is("znver2");
(void)__builtin_cpu_is("znver3");
(void)__builtin_cpu_is("znver4");
}
4 changes: 4 additions & 0 deletions clang/test/Driver/x86-march.c
Expand Up @@ -206,6 +206,10 @@
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver3 2>&1 \
// RUN: | FileCheck %s -check-prefix=znver3
// znver3: "-target-cpu" "znver3"
//
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver4 2>&1 \
// RUN: | FileCheck %s -check-prefix=znver4
// znver4: "-target-cpu" "znver4"

// RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64
// x86-64: "-target-cpu" "x86-64"
Expand Down
1 change: 1 addition & 0 deletions clang/test/Frontend/x86-target-cpu.c
Expand Up @@ -37,5 +37,6 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver1 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver4 -verify %s
//
// expected-no-diagnostics
8 changes: 4 additions & 4 deletions clang/test/Misc/target-invalid-cpu-note.c
Expand Up @@ -13,19 +13,19 @@

// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'
// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}

// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
// X86_64: error: unknown target CPU 'not-a-cpu'
// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}

// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
// TUNE_X86: error: unknown target CPU 'not-a-cpu'
// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}}
// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}

// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
// TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}}
// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, sierraforest, grandridge, graniterapids, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}

// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
Expand Down
132 changes: 132 additions & 0 deletions clang/test/Preprocessor/predefined-arch-macros.c
Expand Up @@ -3832,6 +3832,138 @@
// CHECK_ZNVER3_M64: #define __znver3 1
// CHECK_ZNVER3_M64: #define __znver3__ 1

// RUN: %clang -march=znver4 -m32 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M32
// CHECK_ZNVER4_M32-NOT: #define __3dNOW_A__ 1
// CHECK_ZNVER4_M32-NOT: #define __3dNOW__ 1
// CHECK_ZNVER4_M32: #define __ADX__ 1
// CHECK_ZNVER4_M32: #define __AES__ 1
// CHECK_ZNVER4_M32: #define __AVX2__ 1
// CHECK_ZNVER4_M32: #define __AVX512BF16__ 1
// CHECK_ZNVER4_M32: #define __AVX512BITALG__ 1
// CHECK_ZNVER4_M32: #define __AVX512BW__ 1
// CHECK_ZNVER4_M32: #define __AVX512CD__ 1
// CHECK_ZNVER4_M32: #define __AVX512DQ__ 1
// CHECK_ZNVER4_M32: #define __AVX512F__ 1
// CHECK_ZNVER4_M32: #define __AVX512IFMA__ 1
// CHECK_ZNVER4_M32: #define __AVX512VBMI2__ 1
// CHECK_ZNVER4_M32: #define __AVX512VBMI__ 1
// CHECK_ZNVER4_M32: #define __AVX512VL__ 1
// CHECK_ZNVER4_M32: #define __AVX512VNNI__ 1
// CHECK_ZNVER4_M32: #define __AVX512VPOPCNTDQ__ 1
// CHECK_ZNVER4_M32: #define __AVX__ 1
// CHECK_ZNVER4_M32: #define __BMI2__ 1
// CHECK_ZNVER4_M32: #define __BMI__ 1
// CHECK_ZNVER4_M32: #define __CLFLUSHOPT__ 1
// CHECK_ZNVER4_M32: #define __CLWB__ 1
// CHECK_ZNVER4_M32: #define __CLZERO__ 1
// CHECK_ZNVER4_M32: #define __F16C__ 1
// CHECK_ZNVER4_M32-NOT: #define __FMA4__ 1
// CHECK_ZNVER4_M32: #define __FMA__ 1
// CHECK_ZNVER4_M32: #define __FSGSBASE__ 1
// CHECK_ZNVER4_M32: #define __GFNI__ 1
// CHECK_ZNVER4_M32: #define __LZCNT__ 1
// CHECK_ZNVER4_M32: #define __MMX__ 1
// CHECK_ZNVER4_M32: #define __PCLMUL__ 1
// CHECK_ZNVER4_M32: #define __PKU__ 1
// CHECK_ZNVER4_M32: #define __POPCNT__ 1
// CHECK_ZNVER4_M32: #define __PRFCHW__ 1
// CHECK_ZNVER4_M32: #define __RDPID__ 1
// CHECK_ZNVER4_M32: #define __RDPRU__ 1
// CHECK_ZNVER4_M32: #define __RDRND__ 1
// CHECK_ZNVER4_M32: #define __RDSEED__ 1
// CHECK_ZNVER4_M32: #define __SHA__ 1
// CHECK_ZNVER4_M32: #define __SSE2_MATH__ 1
// CHECK_ZNVER4_M32: #define __SSE2__ 1
// CHECK_ZNVER4_M32: #define __SSE3__ 1
// CHECK_ZNVER4_M32: #define __SSE4A__ 1
// CHECK_ZNVER4_M32: #define __SSE4_1__ 1
// CHECK_ZNVER4_M32: #define __SSE4_2__ 1
// CHECK_ZNVER4_M32: #define __SSE_MATH__ 1
// CHECK_ZNVER4_M32: #define __SSE__ 1
// CHECK_ZNVER4_M32: #define __SSSE3__ 1
// CHECK_ZNVER4_M32-NOT: #define __TBM__ 1
// CHECK_ZNVER4_M32: #define __WBNOINVD__ 1
// CHECK_ZNVER4_M32-NOT: #define __XOP__ 1
// CHECK_ZNVER4_M32: #define __XSAVEC__ 1
// CHECK_ZNVER4_M32: #define __XSAVEOPT__ 1
// CHECK_ZNVER4_M32: #define __XSAVES__ 1
// CHECK_ZNVER4_M32: #define __XSAVE__ 1
// CHECK_ZNVER4_M32: #define __i386 1
// CHECK_ZNVER4_M32: #define __i386__ 1
// CHECK_ZNVER4_M32: #define __tune_znver4__ 1
// CHECK_ZNVER4_M32: #define __znver4 1
// CHECK_ZNVER4_M32: #define __znver4__ 1

// RUN: %clang -march=znver4 -m64 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M64
// CHECK_ZNVER4_M64-NOT: #define __3dNOW_A__ 1
// CHECK_ZNVER4_M64-NOT: #define __3dNOW__ 1
// CHECK_ZNVER4_M64: #define __ADX__ 1
// CHECK_ZNVER4_M64: #define __AES__ 1
// CHECK_ZNVER4_M64: #define __AVX2__ 1
// CHECK_ZNVER4_M64: #define __AVX512BF16__ 1
// CHECK_ZNVER4_M64: #define __AVX512BITALG__ 1
// CHECK_ZNVER4_M64: #define __AVX512BW__ 1
// CHECK_ZNVER4_M64: #define __AVX512CD__ 1
// CHECK_ZNVER4_M64: #define __AVX512DQ__ 1
// CHECK_ZNVER4_M64: #define __AVX512F__ 1
// CHECK_ZNVER4_M64: #define __AVX512IFMA__ 1
// CHECK_ZNVER4_M64: #define __AVX512VBMI2__ 1
// CHECK_ZNVER4_M64: #define __AVX512VBMI__ 1
// CHECK_ZNVER4_M64: #define __AVX512VL__ 1
// CHECK_ZNVER4_M64: #define __AVX512VNNI__ 1
// CHECK_ZNVER4_M64: #define __AVX512VPOPCNTDQ__ 1
// CHECK_ZNVER4_M64: #define __AVX__ 1
// CHECK_ZNVER4_M64: #define __BMI2__ 1
// CHECK_ZNVER4_M64: #define __BMI__ 1
// CHECK_ZNVER4_M64: #define __CLFLUSHOPT__ 1
// CHECK_ZNVER4_M64: #define __CLWB__ 1
// CHECK_ZNVER4_M64: #define __CLZERO__ 1
// CHECK_ZNVER4_M64: #define __F16C__ 1
// CHECK_ZNVER4_M64-NOT: #define __FMA4__ 1
// CHECK_ZNVER4_M64: #define __FMA__ 1
// CHECK_ZNVER4_M64: #define __FSGSBASE__ 1
// CHECK_ZNVER4_M64: #define __GFNI__ 1
// CHECK_ZNVER4_M64: #define __LZCNT__ 1
// CHECK_ZNVER4_M64: #define __MMX__ 1
// CHECK_ZNVER4_M64: #define __PCLMUL__ 1
// CHECK_ZNVER4_M64: #define __PKU__ 1
// CHECK_ZNVER4_M64: #define __POPCNT__ 1
// CHECK_ZNVER4_M64: #define __PRFCHW__ 1
// CHECK_ZNVER4_M64: #define __RDPID__ 1
// CHECK_ZNVER4_M64: #define __RDPRU__ 1
// CHECK_ZNVER4_M64: #define __RDRND__ 1
// CHECK_ZNVER4_M64: #define __RDSEED__ 1
// CHECK_ZNVER4_M64: #define __SHA__ 1
// CHECK_ZNVER4_M64: #define __SSE2_MATH__ 1
// CHECK_ZNVER4_M64: #define __SSE2__ 1
// CHECK_ZNVER4_M64: #define __SSE3__ 1
// CHECK_ZNVER4_M64: #define __SSE4A__ 1
// CHECK_ZNVER4_M64: #define __SSE4_1__ 1
// CHECK_ZNVER4_M64: #define __SSE4_2__ 1
// CHECK_ZNVER4_M64: #define __SSE_MATH__ 1
// CHECK_ZNVER4_M64: #define __SSE__ 1
// CHECK_ZNVER4_M64: #define __SSSE3__ 1
// CHECK_ZNVER4_M64-NOT: #define __TBM__ 1
// CHECK_ZNVER4_M64: #define __VAES__ 1
// CHECK_ZNVER4_M64: #define __VPCLMULQDQ__ 1
// CHECK_ZNVER4_M64: #define __WBNOINVD__ 1
// CHECK_ZNVER4_M64-NOT: #define __XOP__ 1
// CHECK_ZNVER4_M64: #define __XSAVEC__ 1
// CHECK_ZNVER4_M64: #define __XSAVEOPT__ 1
// CHECK_ZNVER4_M64: #define __XSAVES__ 1
// CHECK_ZNVER4_M64: #define __XSAVE__ 1
// CHECK_ZNVER4_M64: #define __amd64 1
// CHECK_ZNVER4_M64: #define __amd64__ 1
// CHECK_ZNVER4_M64: #define __tune_znver4__ 1
// CHECK_ZNVER4_M64: #define __x86_64 1
// CHECK_ZNVER4_M64: #define __x86_64__ 1
// CHECK_ZNVER4_M64: #define __znver4 1
// CHECK_ZNVER4_M64: #define __znver4__ 1

// End X86/GCC/Linux tests ------------------

// Begin PPC/GCC/Linux tests ----------------
Expand Down
8 changes: 8 additions & 0 deletions compiler-rt/lib/builtins/cpu_model.c
Expand Up @@ -627,6 +627,14 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = AMDFAM19H_ZNVER3;
break;
}
if ((Model >= 0x10 && Model <= 0x1f) ||
(Model >= 0x60 && Model <= 0x74) ||
(Model >= 0x78 && Model <= 0x7b) ||
(Model >= 0xA0 && Model <= 0xAf)) {
CPU = "znver4";
*Subtype = AMDFAM19H_ZNVER4;
break; // "znver4"
}
break;
default:
break; // Unknown AMD CPU.
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/X86TargetParser.h
Expand Up @@ -129,6 +129,7 @@ enum CPUKind {
CK_ZNVER1,
CK_ZNVER2,
CK_ZNVER3,
CK_ZNVER4,
CK_x86_64,
CK_x86_64_v2,
CK_x86_64_v3,
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Support/Host.cpp
Expand Up @@ -1105,7 +1105,15 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = X86::AMDFAM19H_ZNVER3;
break;
}
break;
if ((Model >= 0x10 && Model <= 0x1f) ||
(Model >= 0x60 && Model <= 0x74) ||
(Model >= 0x78 && Model <= 0x7b) ||
(Model >= 0xA0 && Model <= 0xAf)) {
CPU = "znver4";
*Subtype = X86::AMDFAM19H_ZNVER4;
break; // "znver4"
}
break; // family 19h
default:
break; // Unknown AMD CPU.
}
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Support/X86TargetParser.cpp
Expand Up @@ -298,6 +298,12 @@ constexpr FeatureBitset FeaturesZNVER2 = FeaturesZNVER1 | FeatureCLWB |
static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
FeatureINVPCID | FeaturePKU |
FeatureVAES | FeatureVPCLMULQDQ;
static constexpr FeatureBitset FeaturesZNVER4 =
FeaturesZNVER3 | FeatureAVX512F | FeatureAVX512CD | FeatureAVX512DQ |
FeatureAVX512BW | FeatureAVX512VL | FeatureAVX512IFMA | FeatureAVX512VBMI |
FeatureAVX512VBMI2 | FeatureAVX512VNNI | FeatureAVX512BITALG |
FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI |
FeatureSHSTK;

constexpr ProcInfo Processors[] = {
// Empty processor. Include X87 and CMPXCHG8 for backwards compatibility.
Expand Down Expand Up @@ -425,6 +431,7 @@ constexpr ProcInfo Processors[] = {
{ {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1 },
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 },
{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3 },
{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4 },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 },
{ {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 },
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/X86/X86.td
Expand Up @@ -1295,6 +1295,23 @@ def ProcessorFeatures {
!listconcat(ZN2Tuning, ZN3AdditionalTuning);
list<SubtargetFeature> ZN3Features =
!listconcat(ZN2Features, ZN3AdditionalFeatures);
list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
FeatureCDI,
FeatureDQI,
FeatureBWI,
FeatureVLX,
FeatureVBMI,
FeatureVBMI2,
FeatureIFMA,
FeatureVNNI,
FeatureBITALG,
FeatureGFNI,
FeatureBF16,
FeatureSHSTK,
FeatureVPOPCNTDQ];
list<SubtargetFeature> ZN4Features =
!listconcat(ZN3Features, ZN4AdditionalFeatures);
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1599,6 +1616,8 @@ def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
ProcessorFeatures.ZN2Tuning>;
def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : Proc<"znver4",ProcessorFeatures.ZN4Features,
ProcessorFeatures.ZN4Tuning>;

def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86PfmCounters.td
Expand Up @@ -289,3 +289,4 @@ def ZnVer3PfmCounters : ProcPfmCounters {
];
}
def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>;
def : PfmCountersBinding<"znver4", ZnVer3PfmCounters>;
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/cpus-amd.ll
Expand Up @@ -28,6 +28,7 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty

define void @foo() {
ret void
Expand Down

0 comments on commit 1f057e3

Please sign in to comment.