Skip to content

Commit

Permalink
Implement __cpuid and __cpuidex as Clang builtins
Browse files Browse the repository at this point in the history
https://reviews.llvm.org/D23944 implemented the #pragma intrinsic from
MSVC. This causes the statement #pragma intrinsic(cpuid) to fail [0]
on Clang because cpuid is currently implemented in intrin.h instead
of a Clang builtin. Reimplementing cpuid (as well as it's releated
function, cpuidex) should resolve this.

[0]: https://crbug.com/1279344

Differential revision: https://reviews.llvm.org/D121653
  • Loading branch information
alanzhao1 authored and zmodem committed Mar 18, 2022
1 parent a014cb8 commit 8cd8bd4
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 31 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -2071,6 +2071,9 @@ TARGET_HEADER_BUILTIN(_ReadWriteBarrier, "v", "nh", "intrin.h", ALL_MS_LANGUAGES
TARGET_HEADER_BUILTIN(_ReadBarrier, "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_WriteBarrier, "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "")

TARGET_HEADER_BUILTIN(__cpuid, "vi*i", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__cpuidex, "vi*ii", "nh", "intrin.h", ALL_MS_LANGUAGES, "")

TARGET_HEADER_BUILTIN(__emul, "LLiii", "nch", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__emulu, "ULLiUiUi", "nch", "intrin.h", ALL_MS_LANGUAGES, "")

Expand Down
40 changes: 40 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14922,6 +14922,46 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86Select(*this, Ops[2], Res, Ops[1]);
}

case X86::BI__cpuid:
case X86::BI__cpuidex: {
Value *FuncId = EmitScalarExpr(E->getArg(1));
Value *SubFuncId = BuiltinID == X86::BI__cpuidex
? EmitScalarExpr(E->getArg(2))
: llvm::ConstantInt::get(Int32Ty, 0);

llvm::StructType *CpuidRetTy =
llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
llvm::FunctionType *FTy =
llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);

StringRef Asm, Constraints;
if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
Asm = "cpuid";
Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
} else {
// x86-64 uses %rbx as the base register, so preserve it.
Asm = "xchgq %rbx, ${1:q}\n"
"cpuid\n"
"xchgq %rbx, ${1:q}";
Constraints = "={ax},=r,={cx},={dx},0,2";
}

llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
/*hasSideEffects=*/false);
Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
Value *BasePtr = EmitScalarExpr(E->getArg(0));
Value *Store = nullptr;
for (unsigned i = 0; i < 4; i++) {
Value *Extracted = Builder.CreateExtractValue(IACall, i);
Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
}

// Return the last store instruction to signal that we have emitted the
// the intrinsic.
return Store;
}

case X86::BI__emul:
case X86::BI__emulu: {
llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
Expand Down
21 changes: 0 additions & 21 deletions clang/lib/Headers/intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,27 +534,6 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst,
|* Misc
\*----------------------------------------------------------------------------*/
#if defined(__i386__) || defined(__x86_64__)
#if defined(__i386__)
#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \
__asm("cpuid" \
: "=a"(__eax), "=b"(__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__leaf), "2"(__count))
#else
/* x86-64 uses %rbx as the base register, so preserve it. */
#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \
__asm("xchg{q} {%%rbx, %q1|%q1, rbx}\n" \
"cpuid\n" \
"xchg{q} {%%rbx, %q1|%q1, rbx}" \
: "=a"(__eax), "=r"(__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__leaf), "2"(__count))
#endif
static __inline__ void __DEFAULT_FN_ATTRS __cpuid(int __info[4], int __level) {
__cpuid_count(__level, 0, __info[0], __info[1], __info[2], __info[3]);
}
static __inline__ void __DEFAULT_FN_ATTRS __cpuidex(int __info[4], int __level,
int __ecx) {
__cpuid_count(__level, __ecx, __info[0], __info[1], __info[2], __info[3]);
}
static __inline__ void __DEFAULT_FN_ATTRS __halt(void) {
__asm__ volatile("hlt");
}
Expand Down
77 changes: 67 additions & 10 deletions clang/test/CodeGen/ms-intrinsics-cpuid.c
Original file line number Diff line number Diff line change
@@ -1,23 +1,80 @@
// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
// RUN: -triple i686-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X86
// RUN: -Werror -triple i686-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X86
// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
// RUN: -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X64
// RUN: -Werror -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X64

// intrin.h needs size_t, but -ffreestanding prevents us from getting it from
// stddef.h. Work around it with this typedef.
typedef __SIZE_TYPE__ size_t;

#include <intrin.h>

void test__cpuid(int *info, int level) {
__cpuid(info, level);
#pragma intrinsic(__cpuid)

void test__cpuid(int cpuInfo[4], int function_id) {
__cpuid(cpuInfo, function_id);
}
// X86-LABEL: define {{.*}} @test__cpuid(i32* noundef %{{.*}}, i32 noundef %{{.*}})
// X86: call { i32, i32, i32, i32 } asm "cpuid",
// X86-SAME: "={ax},={bx},={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"
// X86-SAME: (i32 %{{.*}}, i32 0)
// X86-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},{ax},{cx}"
// X86-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
// X86-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
// X86-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
// X86-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
// X86-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
// X86-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
// X86-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
// X86-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
// X86-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
// X86-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
// X86-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
// X86-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4

// X64-LABEL: define {{.*}} @test__cpuid(i32* noundef %{{.*}}, i32 noundef %{{.*}})
// X64: call { i32, i32, i32, i32 } asm "xchg$(q$) $(%rbx{{.*}}$){{.*}}cpuid{{.*}}xchg$(q$) $(%rbx{{.*}}$)",
// X64-SAME: "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"
// X64-SAME: (i32 %{{.*}}, i32 0)
// X64-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "xchgq %rbx, ${1:q}\0Acpuid\0Axchgq %rbx, ${1:q}", "={ax},=r,={cx},={dx},0,2"
// X64-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
// X64-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
// X64-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
// X64-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
// X64-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
// X64-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
// X64-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
// X64-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
// X64-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
// X64-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
// X64-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
// X64-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4

#pragma intrinsic(__cpuidex)

void test__cpuidex(int cpuInfo[4], int function_id, int subfunction_id) {
__cpuidex(cpuInfo, function_id, subfunction_id);
}
// X86-LABEL: define {{.*}} @test__cpuidex(i32* noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
// X86-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},{ax},{cx}"
// X86-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
// X86-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
// X86-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
// X86-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
// X86-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
// X86-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
// X86-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
// X86-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
// X86-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
// X86-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
// X86-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
// X86-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4

// X64-LABEL: define {{.*}} @test__cpuidex(i32* noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
// X64-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "xchgq %rbx, ${1:q}\0Acpuid\0Axchgq %rbx, ${1:q}", "={ax},=r,={cx},={dx},0,2"
// X64-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
// X64-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
// X64-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
// X64-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
// X64-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
// X64-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
// X64-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
// X64-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
// X64-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
// X64-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
// X64-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
// X64-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4

0 comments on commit 8cd8bd4

Please sign in to comment.