Skip to content

Commit

Permalink
[X86][1/2] Support PREFETCHI instructions
Browse files Browse the repository at this point in the history
For more details about these instructions, please refer to the latest ISE document: https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D136040
  • Loading branch information
phoebewang committed Oct 20, 2022
1 parent 859b614 commit 62ca791
Show file tree
Hide file tree
Showing 22 changed files with 203 additions and 2 deletions.
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Expand Up @@ -587,6 +587,7 @@ X86 Support in Clang
- Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk.
- Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC.
- Switch ``AVX512-BF16`` intrinsics types from ``short`` to ``__bf16``.
- Add support for ``PREFETCHI`` instructions.

DWARF Support in Clang
----------------------
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/BuiltinsX86_64.def
Expand Up @@ -133,6 +133,8 @@ TARGET_BUILTIN(__builtin_ia32_tdpbuud, "vIUcIUcIUc", "n", "amx-int8")
TARGET_BUILTIN(__builtin_ia32_tdpbf16ps, "vIUcIUcIUc", "n", "amx-bf16")
TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vUOi", "n", "ptwrite")

TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi")

#undef BUILTIN
#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Expand Up @@ -4647,6 +4647,8 @@ def mpconfig : Flag<["-"], "mpconfig">, Group<m_x86_Features_Group>;
def mno_pconfig : Flag<["-"], "mno-pconfig">, Group<m_x86_Features_Group>;
def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/X86.cpp
Expand Up @@ -290,6 +290,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasCLWB = true;
} else if (Feature == "+wbnoinvd") {
HasWBNOINVD = true;
} else if (Feature == "+prefetchi") {
HasPREFETCHI = true;
} else if (Feature == "+prefetchwt1") {
HasPREFETCHWT1 = true;
} else if (Feature == "+clzero") {
Expand Down Expand Up @@ -738,6 +740,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__SHSTK__");
if (HasSGX)
Builder.defineMacro("__SGX__");
if (HasPREFETCHI)
Builder.defineMacro("__PREFETCHI__");
if (HasPREFETCHWT1)
Builder.defineMacro("__PREFETCHWT1__");
if (HasCLZERO)
Expand Down Expand Up @@ -929,6 +933,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("pconfig", true)
.Case("pku", true)
.Case("popcnt", true)
.Case("prefetchi", true)
.Case("prefetchwt1", true)
.Case("prfchw", true)
.Case("ptwrite", true)
Expand Down Expand Up @@ -1025,6 +1030,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("pconfig", HasPCONFIG)
.Case("pku", HasPKU)
.Case("popcnt", HasPOPCNT)
.Case("prefetchi", HasPREFETCHI)
.Case("prefetchwt1", HasPREFETCHWT1)
.Case("prfchw", HasPRFCHW)
.Case("ptwrite", HasPTWRITE)
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/X86.h
Expand Up @@ -123,6 +123,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCLFLUSHOPT = false;
bool HasCLWB = false;
bool HasMOVBE = false;
bool HasPREFETCHI = false;
bool HasPREFETCHWT1 = false;
bool HasRDPID = false;
bool HasRDPRU = false;
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -15479,6 +15479,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
static constexpr int Mask[] = {0, 5, 6, 7};
return Builder.CreateShuffleVector(Call, Ops[2], Mask);
}
case X86::BI__builtin_ia32_prefetchi:
return Builder.CreateCall(
CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
{Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
llvm::ConstantInt::get(Int32Ty, 0)});
}
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/Headers/CMakeLists.txt
Expand Up @@ -174,6 +174,7 @@ set(x86_files
pkuintrin.h
pmmintrin.h
popcntintrin.h
prfchiintrin.h
prfchwintrin.h
ptwriteintrin.h
rdpruintrin.h
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Headers/cpuid.h
Expand Up @@ -204,6 +204,9 @@
#define bit_AVX512BF16 0x00000020
#define bit_HRESET 0x00400000

/* Features in %edx for leaf 7 sub-leaf 1 */
#define bit_PREFETCHI 0x00004000

/* Features in %eax for leaf 13 sub-leaf 1 */
#define bit_XSAVEOPT 0x00000001
#define bit_XSAVEC 0x00000002
Expand Down
61 changes: 61 additions & 0 deletions clang/lib/Headers/prfchiintrin.h
@@ -0,0 +1,61 @@
/*===---- prfchiintrin.h - PREFETCHI intrinsic -----------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/

#ifndef __PRFCHIINTRIN_H
#define __PRFCHIINTRIN_H

#ifdef __x86_64__

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("prefetchi")))

/// Loads an instruction sequence containing the specified memory address into
/// all level cache.
///
/// Note that the effect of this intrinsic is dependent on the processor
/// implementation.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c PREFETCHIT0 instruction.
///
/// \param __P
/// A pointer specifying the memory address to be prefetched.
static __inline__ void __DEFAULT_FN_ATTRS
_m_prefetchit0(volatile const void *__P) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcast-qual"
__builtin_ia32_prefetchi((const void *)__P, 3 /* _MM_HINT_T0 */);
#pragma clang diagnostic pop
}

/// Loads an instruction sequence containing the specified memory address into
/// all but the first-level cache.
///
/// Note that the effect of this intrinsic is dependent on the processor
/// implementation.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the \c PREFETCHIT1 instruction.
///
/// \param __P
/// A pointer specifying the memory address to be prefetched.
static __inline__ void __DEFAULT_FN_ATTRS
_m_prefetchit1(volatile const void *__P) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcast-qual"
__builtin_ia32_prefetchi((const void *)__P, 2 /* _MM_HINT_T1 */);
#pragma clang diagnostic pop
}
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS

#endif /* __PRFCHWINTRIN_H */
5 changes: 5 additions & 0 deletions clang/lib/Headers/x86gprintrin.h
Expand Up @@ -25,6 +25,11 @@
#include <crc32intrin.h>
#endif

#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__PRFCHI__)
#include <prfchiintrin.h>
#endif

#if defined(__i386__)
#define __SAVE_GPRBX "mov {%%ebx, %%eax |eax, ebx};"
#define __RESTORE_GPRBX "mov {%%eax, %%ebx |ebx, eax};"
Expand Down
16 changes: 16 additions & 0 deletions clang/test/CodeGen/X86/prefetchi-builtins.c
@@ -0,0 +1,16 @@
// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -target-feature +prefetchi -emit-llvm -o - %s | FileCheck %s


#include <x86intrin.h>

void test_m_prefetch_it0(void *p) {
return _m_prefetchit0(p);
// CHECK-LABEL: define{{.*}} void @test_m_prefetch_it0
// CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 0)
}

void test_m_prefetch_it1(void *p) {
return _m_prefetchit1(p);
// CHECK-LABEL: define{{.*}} void @test_m_prefetch_it1
// CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 2, i32 0)
}
5 changes: 5 additions & 0 deletions clang/test/Driver/x86-target-features.c
Expand Up @@ -91,6 +91,11 @@
// PREFETCHWT1: "-target-feature" "+prefetchwt1"
// NO-PREFETCHWT1: "-target-feature" "-prefetchwt1"

// RUN: %clang --target=i386 -march=i386 -mprefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHI %s
// RUN: %clang --target=i386 -march=i386 -mno-prefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHI %s
// PREFETCHI: "-target-feature" "+prefetchi"
// NO-PREFETCHI: "-target-feature" "-prefetchi"

// RUN: %clang --target=i386 -march=i386 -mclzero %s -### 2>&1 | FileCheck -check-prefix=CLZERO %s
// RUN: %clang --target=i386 -march=i386 -mno-clzero %s -### 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
// CLZERO: "-target-feature" "+clzero"
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/X86TargetParser.def
Expand Up @@ -174,6 +174,7 @@ X86_FEATURE (MOVDIRI, "movdiri")
X86_FEATURE (MWAITX, "mwaitx")
X86_FEATURE (PCONFIG, "pconfig")
X86_FEATURE (PKU, "pku")
X86_FEATURE (PREFETCHI, "prefetchi")
X86_FEATURE (PREFETCHWT1, "prefetchwt1")
X86_FEATURE (PRFCHW, "prfchw")
X86_FEATURE (PTWRITE, "ptwrite")
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/Host.cpp
Expand Up @@ -1808,6 +1808,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);

bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/X86TargetParser.cpp
Expand Up @@ -581,6 +581,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};

static constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86.td
Expand Up @@ -134,6 +134,9 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI",
"true",
"Prefetch instruction with T0 or T1 Hint">;
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
Expand Up @@ -73,7 +73,8 @@ class X86DiscriminateMemOps : public MachineFunctionPass {

bool IsPrefetchOpcode(unsigned Opcode) {
return Opcode == X86::PREFETCHNTA || Opcode == X86::PREFETCHT0 ||
Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2;
Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2 ||
Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1;
}
} // end anonymous namespace

Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.td
Expand Up @@ -956,6 +956,7 @@ def HasSGX : Predicate<"Subtarget->hasSGX()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
Expand Down Expand Up @@ -2998,6 +2999,16 @@ let Predicates = [HasUINTR, In64BitMode], SchedRW = [WriteSystem] in {
[(set EFLAGS, (X86testui))]>, XS;
}

//===----------------------------------------------------------------------===//
// PREFETCHIT0 and PREFETCHIT1 Instructions
// prefetch ADDR, RW, Locality, Data
let Predicates = [HasPREFETCHI, In64BitMode], SchedRW = [WriteLoad] in {
def PREFETCHIT0 : I<0x18, MRM7m, (outs), (ins i8mem:$src),
"prefetchit0\t$src", [(prefetch addr:$src, (i32 0), (i32 3), (i32 0))]>, TB;
def PREFETCHIT1 : I<0x18, MRM6m, (outs), (ins i8mem:$src),
"prefetchit1\t$src", [(prefetch addr:$src, (i32 0), (i32 2), (i32 0))]>, TB;
}

//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86Subtarget.h
Expand Up @@ -221,7 +221,8 @@ class X86Subtarget final : public X86GenSubtargetInfo {
// We implicitly enable these when we have a write prefix supporting cache
// level OR if we have prfchw, but don't already have a read prefetch from
// 3dnow.
return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1();
return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
hasPREFETCHI();
}
bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
// These are generic getters that OR together all of the thunk types
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/X86/prefetchi.ll
@@ -0,0 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+prefetchi | FileCheck %s

define dso_local void @t(ptr %ptr) nounwind {
; CHECK-LABEL: t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: prefetchit1 (%rdi)
; CHECK-NEXT: prefetchit0 (%rdi)
; CHECK-NEXT: prefetchit1 t(%rip)
; CHECK-NEXT: prefetchit0 ext(%rip)
; CHECK-NEXT: retq
entry:
tail call void @llvm.prefetch(ptr %ptr, i32 0, i32 2, i32 0)
tail call void @llvm.prefetch(ptr %ptr, i32 0, i32 3, i32 0)
tail call void @llvm.prefetch(ptr @t, i32 0, i32 2, i32 0)
tail call void @llvm.prefetch(ptr @ext, i32 0, i32 3, i32 0)
ret void
}

declare dso_local void @ext() nounwind
declare void @llvm.prefetch(ptr, i32, i32, i32) nounwind
6 changes: 6 additions & 0 deletions llvm/test/MC/Disassembler/X86/x86-64.txt
Expand Up @@ -761,3 +761,9 @@

# CHECK: rdpru
0x0f,0x01,0xfd

# CHECK: prefetchit0 (%rip)
0x0f,0x18,0x3d,0x00,0x00,0x00,0x00

# CHECK: prefetchit1 (%rip)
0x0f,0x18,0x35,0x00,0x00,0x00,0x00
47 changes: 47 additions & 0 deletions llvm/test/MC/X86/PREFETCH-64.s
Expand Up @@ -168,3 +168,50 @@ prefetchwt1 64(%rdx,%rax)
// CHECK: encoding: [0x0f,0x0d,0x12]
prefetchwt1 (%rdx)

// CHECK: prefetchit0 485498096
// CHECK: encoding: [0x0f,0x18,0x3c,0x25,0xf0,0x1c,0xf0,0x1c]
prefetchit0 485498096

// CHECK: prefetchit0 64(%rdx)
// CHECK: encoding: [0x0f,0x18,0x7a,0x40]
prefetchit0 64(%rdx)

// CHECK: prefetchit0 64(%rdx,%rax,4)
// CHECK: encoding: [0x0f,0x18,0x7c,0x82,0x40]
prefetchit0 64(%rdx,%rax,4)

// CHECK: prefetchit0 -64(%rdx,%rax,4)
// CHECK: encoding: [0x0f,0x18,0x7c,0x82,0xc0]
prefetchit0 -64(%rdx,%rax,4)

// CHECK: prefetchit0 64(%rdx,%rax)
// CHECK: encoding: [0x0f,0x18,0x7c,0x02,0x40]
prefetchit0 64(%rdx,%rax)

// CHECK: prefetchit0 (%rdx)
// CHECK: encoding: [0x0f,0x18,0x3a]
prefetchit0 (%rdx)

// CHECK: prefetchit1 485498096
// CHECK: encoding: [0x0f,0x18,0x34,0x25,0xf0,0x1c,0xf0,0x1c]
prefetchit1 485498096

// CHECK: prefetchit1 64(%rdx)
// CHECK: encoding: [0x0f,0x18,0x72,0x40]
prefetchit1 64(%rdx)

// CHECK: prefetchit1 64(%rdx,%rax,4)
// CHECK: encoding: [0x0f,0x18,0x74,0x82,0x40]
prefetchit1 64(%rdx,%rax,4)

// CHECK: prefetchit1 -64(%rdx,%rax,4)
// CHECK: encoding: [0x0f,0x18,0x74,0x82,0xc0]
prefetchit1 -64(%rdx,%rax,4)

// CHECK: prefetchit1 64(%rdx,%rax)
// CHECK: encoding: [0x0f,0x18,0x74,0x02,0x40]
prefetchit1 64(%rdx,%rax)

// CHECK: prefetchit1 (%rdx)
// CHECK: encoding: [0x0f,0x18,0x32]
prefetchit1 (%rdx)

0 comments on commit 62ca791

Please sign in to comment.