From cd79485a96da328304a2978e4d2d094e06206219 Mon Sep 17 00:00:00 2001 From: Daniel Lenski Date: Thu, 20 Nov 2025 21:30:56 -0800 Subject: [PATCH 1/4] cpuid-dump should print hypervisor leaves (0x40000000) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per https://en.wikipedia.org/wiki/CPUID#EAX=4000'0000h-4FFFF'FFFh:_Reserved_for_Hypervisors: > CPUID leaves 40000000h to 4FFFFFFFh are not implemented in hardware, and > are reserved for use by hypervisors to provide hypervisor-specific > identification and feature information through this interception > mechanism. > > For leaf 40000000h, the hypervisor is expected to return the index of the > highest supported hypervisor CPUID leaf in EAX, and a 12-character > hypervisor ID string in EBX,ECX,EDX (in that order). For leaf 40000001h, > the hypervisor may return an interface identification signature in EAX - > e.g. hypervisors that wish to advertise that they are Hyper-V compatible > may return 0x31237648—"Hv#1" in EAX. For example, running with user-mode QEMU: ``` $ qemu-x86_64 cpuid-dump … CPUID 40000000: 40000001-54474354-43544743-47435447 [TCGTCGTCGTCG] CPUID 40000001: 00000000-00000000-00000000-00000000 … ``` Running under a recent version of WSL2 (Microsoft Hyper-V): ``` $ cpuid-dump … CPUID 40000000: 4000000B-7263694D-666F736F-76482074 [Microsoft Hv] CPUID 40000001: 31237648-00000000-00000000-00000000 [Hv#1] … CPUID 4000000B: 00000000-00000000-00000000-00000000 ``` --- tools/cpuid-dump.c | 76 ++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/tools/cpuid-dump.c b/tools/cpuid-dump.c index 1a65510f..febdc682 100644 --- a/tools/cpuid-dump.c +++ b/tools/cpuid-dump.c @@ -24,39 +24,45 @@ static void print_cpuidex(struct cpuid_regs regs, uint32_t eax, uint32_t ecx) { ecx); } -static void print_cpuid_vendor(struct cpuid_regs regs, uint32_t eax) { - if (regs.ebx | regs.ecx | regs.edx) { - char vendor_id[12]; - memcpy(&vendor_id[0], ®s.ebx, sizeof(regs.ebx)); - memcpy(&vendor_id[4], ®s.edx, sizeof(regs.edx)); - memcpy(&vendor_id[8], ®s.ecx, sizeof(regs.ecx)); - printf("CPUID %08" PRIX32 ": %08" PRIX32 "-%08" PRIX32 "-%08" PRIX32 "-%08" PRIX32 " [%.12s]\n", +enum cpuid_string_format { REG_ABCD = 1, REG_BDC = 2, REG_BCD = 3, REG_A = 4 }; + +static void print_cpuid_string(struct cpuid_regs regs, uint32_t eax, enum cpuid_string_format fmt) { + char buf[16]; + memset(buf, 0, sizeof(buf)); + switch (fmt) { + case REG_ABCD: + memcpy(&buf[0], ®s.eax, sizeof(regs.eax)); + memcpy(&buf[4], ®s.ebx, sizeof(regs.ebx)); + memcpy(&buf[8], ®s.ecx, sizeof(regs.ecx)); + memcpy(&buf[12], ®s.edx, sizeof(regs.edx)); + break; + case REG_BCD: + memcpy(&buf[0], ®s.ebx, sizeof(regs.ebx)); + memcpy(&buf[4], ®s.ecx, sizeof(regs.ecx)); + memcpy(&buf[8], ®s.edx, sizeof(regs.edx)); + break; + case REG_BDC: + memcpy(&buf[0], ®s.ebx, sizeof(regs.ebx)); + memcpy(&buf[4], ®s.edx, sizeof(regs.ecx)); + memcpy(&buf[8], ®s.ecx, sizeof(regs.edx)); + break; + case REG_A: + memcpy(&buf[0], ®s.eax, sizeof(regs.eax)); + break; + } + if (buf[0]) { + printf("CPUID %08" PRIX32 ": %08" PRIX32 "-%08" PRIX32 "-%08" PRIX32 "-%08" PRIX32 " [%.16s]\n", eax, regs.eax, regs.ebx, regs.ecx, regs.edx, - vendor_id); + buf); } else { print_cpuid(regs, eax); } } -static void print_cpuid_brand_string(struct cpuid_regs regs, uint32_t eax) { - char brand_string[16]; - memcpy(&brand_string[0], ®s.eax, sizeof(regs.eax)); - memcpy(&brand_string[4], ®s.ebx, sizeof(regs.ebx)); - memcpy(&brand_string[8], ®s.ecx, sizeof(regs.ecx)); - memcpy(&brand_string[12], ®s.edx, sizeof(regs.edx)); - printf("CPUID %08" PRIX32 ": %08" PRIX32 "-%08" PRIX32 "-%08" PRIX32 "-%08" PRIX32 " [%.16s]\n", - eax, - regs.eax, - regs.ebx, - regs.ecx, - regs.edx, - brand_string); -} - int main(int argc, char** argv) { const uint32_t max_base_index = cpuid(0).eax; uint32_t max_structured_index = 0, max_trace_index = 0, max_socid_index = 0; @@ -64,7 +70,7 @@ int main(int argc, char** argv) { for (uint32_t eax = 0; eax <= max_base_index; eax++) { switch (eax) { case UINT32_C(0x00000000): - print_cpuid_vendor(cpuid(eax), eax); + print_cpuid_string(cpuid(eax), eax, REG_BDC); break; case UINT32_C(0x00000004): for (uint32_t ecx = 0;; ecx++) { @@ -138,16 +144,34 @@ int main(int argc, char** argv) { } } + /** + * CPUID[1].ECX bit 31 is supposed to indicate whether or not + * a hypervisor is running, but not all hypervisors set it. + */ + const uint32_t max_hypervisor_index = cpuid(UINT32_C(0x40000000)).eax; + for (uint32_t eax = UINT32_C(0x40000000); eax <= max_hypervisor_index; eax++) { + switch (eax) { + case UINT32_C(0x40000000): + print_cpuid_string(cpuid(eax), eax, REG_BCD); + break; + case UINT32_C(0x40000001): + print_cpuid_string(cpuid(eax), eax, REG_A); + break; + default: + print_cpuid(cpuidex(eax, 0), eax); + } + } + const uint32_t max_extended_index = cpuid(UINT32_C(0x80000000)).eax; for (uint32_t eax = UINT32_C(0x80000000); eax <= max_extended_index; eax++) { switch (eax) { case UINT32_C(0x80000000): - print_cpuid_vendor(cpuid(eax), eax); + print_cpuid_string(cpuid(eax), eax, REG_BDC); break; case UINT32_C(0x80000002): case UINT32_C(0x80000003): case UINT32_C(0x80000004): - print_cpuid_brand_string(cpuid(eax), eax); + print_cpuid_string(cpuid(eax), eax, REG_ABCD); break; default: print_cpuid(cpuidex(eax, 0), eax); From e2ed60b69e1c0b5f304544e75bd72dc9ffdc7227 Mon Sep 17 00:00:00 2001 From: Daniel Lenski Date: Fri, 21 Nov 2025 15:02:38 -0800 Subject: [PATCH 2/4] cpuid-dump should handle leaf 0x1f Per Intel's April 2023 documentation (https://cdrdv2-public.intel.com/775917/intel-64-architecture-processor-topology-enumeration.pdf): > The extended topology enumeration leaf of CPUID (leaf 0BH) was introduced > in 2009 along with the x2APIC IDs. This leaf has been superseded by the > v2 extended topology enumeration leaf (CPUID leaf 1FH), which is the > preferred interface for system topology enumeration for current Intel 64 > processors --- tools/cpuid-dump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/cpuid-dump.c b/tools/cpuid-dump.c index febdc682..0f3a4659 100644 --- a/tools/cpuid-dump.c +++ b/tools/cpuid-dump.c @@ -92,6 +92,7 @@ int main(int argc, char** argv) { } break; case UINT32_C(0x0000000B): + case UINT32_C(0x0000001F): // Extended/V2 for (uint32_t ecx = 0;; ecx++) { const struct cpuid_regs regs = cpuidex(eax, ecx); if ((regs.ecx & UINT32_C(0x0000FF00)) == 0) { From 7a23453a2e6f456978efe9c81477fa63956a6a24 Mon Sep 17 00:00:00 2001 From: Daniel Lenski Date: Thu, 20 Nov 2025 14:39:48 -0800 Subject: [PATCH 3/4] cpuid-dump should iterate through all CPUs As Intel puts it: "CPUID, by design, returns different values depending on the core it is executed on" (see https://www.intel.com/content/www/us/en/developer/articles/guide/12th-gen-intel-core-processor-gamedev-guide.html#inpage-nav-1-5-2:~:text=CPUID%2C%20by%20design%2C%20returns%20different%20values%20depending%20on%20the%20core%20it%20is%20executed%20on) In particular, leaves 1, 4, 0x0b, 0x1a, and 0x1f are known to vary by core. Leaf 0x1a differentiates core types on hybrid CPUs. In order to aid in exploration of CPUID contents, `cpuid-dump` should dump CPUID results from *all* CPUs, rather than just one. This is currently implemented for Linux only, using the `sched_setaffinity(2)` system call. --- tools/cpuid-dump.c | 279 ++++++++++++++++++++++++++++++--------------- 1 file changed, 184 insertions(+), 95 deletions(-) diff --git a/tools/cpuid-dump.c b/tools/cpuid-dump.c index 0f3a4659..4c87bc68 100644 --- a/tools/cpuid-dump.c +++ b/tools/cpuid-dump.c @@ -1,3 +1,9 @@ +#ifdef __linux__ +#define _GNU_SOURCE +#include +#include +#endif + #include #include #include @@ -63,119 +69,202 @@ static void print_cpuid_string(struct cpuid_regs regs, uint32_t eax, enum cpuid_ } } +static void force_one_cpu(int cpu, int n_cpus) { + if (n_cpus > 1) { +#ifdef __linux__ + cpu_set_t mask_one; + CPU_ZERO(&mask_one); + CPU_SET(cpu, &mask_one); + if (sched_setaffinity(0, sizeof mask_one, &mask_one) != 0) { + char buf[80]; + snprintf(buf, sizeof(buf), "sched_setaffinity to CPU %d failed", cpu); + perror(buf); + exit(1); + } +#endif + } +} + +static void print_cpu_index(int cpu, int n_cpus) { +#ifdef __linux__ + if (n_cpus > 1) + printf("cpu%.*d: ", (n_cpus < 10 ? 1 : (n_cpus < 100 ? 2 : 3)), cpu); +#endif +} + int main(int argc, char** argv) { const uint32_t max_base_index = cpuid(0).eax; - uint32_t max_structured_index = 0, max_trace_index = 0, max_socid_index = 0; - bool has_sgx = false; - for (uint32_t eax = 0; eax <= max_base_index; eax++) { - switch (eax) { - case UINT32_C(0x00000000): - print_cpuid_string(cpuid(eax), eax, REG_BDC); - break; - case UINT32_C(0x00000004): - for (uint32_t ecx = 0;; ecx++) { - const struct cpuid_regs regs = cpuidex(eax, ecx); - if ((regs.eax & UINT32_C(0x1F)) == 0) { - break; - } - print_cpuidex(regs, eax, ecx); - } - break; - case UINT32_C(0x00000007): - for (uint32_t ecx = 0; ecx <= max_structured_index; ecx++) { - const struct cpuid_regs regs = cpuidex(eax, ecx); - if (ecx == 0) { - max_structured_index = regs.eax; - has_sgx = !!(regs.ebx & UINT32_C(0x00000004)); + uint32_t max_structured_index = 0, max_trace_index = 0, max_socid_index = 0, n_log_proc = 1; + bool has_sgx = false, is_hybrid = false; + +#ifdef __linux__ + // TODO: handle case of >CPU_SETSIZE logical CPUs, or non-contiguity + cpu_set_t mask_default; + if (sched_getaffinity(0, sizeof mask_default, &mask_default) != 0) { + perror("sched_getaffinity failed"); + exit(1); + } + n_log_proc = CPU_COUNT(&mask_default); +#endif + if (max_base_index >= 1) { + // TODO: handle case of >256 logical CPUs, or multiple packages + const struct cpuid_regs regs = cpuid(1); + uint32_t n_apic_id; + if (regs.edx & UINT32_C(0x10000000)) { // Number of logical CPUs field is valid + n_apic_id = ((regs.ebx & UINT32_C(0x00ff0000)) >> 16); + +#ifdef __linux__ + if (n_apic_id != n_log_proc) + fprintf(stderr, + "WARNING: %d logical CPUs per CPUID.01h.EBX[23:16] != %d per sched_getaffinity()\n", + n_apic_id, + n_log_proc); +#else + if (n_apic_id > 1) + fprintf(stderr, + "WARNING: %d logical CPUs per CPUID.01h.EBX, results may vary by logical CPU\n", + n_apic_id); +#endif + } + } + + for (uint32_t lp = 0; lp < n_log_proc; lp++) { + force_one_cpu(lp, n_log_proc); + + for (uint32_t eax = 0; eax <= max_base_index; eax++) { + switch (eax) { + case UINT32_C(0x00000000): + print_cpu_index(lp, n_log_proc); + print_cpuid_string(cpuid(eax), eax, REG_BDC); + break; + case UINT32_C(0x00000004): + for (uint32_t ecx = 0;; ecx++) { + const struct cpuid_regs regs = cpuidex(eax, ecx); + if ((regs.eax & UINT32_C(0x1F)) == 0) { + break; + } + print_cpu_index(lp, n_log_proc); + print_cpuidex(regs, eax, ecx); } - print_cpuidex(regs, eax, ecx); - } - break; - case UINT32_C(0x0000000B): - case UINT32_C(0x0000001F): // Extended/V2 - for (uint32_t ecx = 0;; ecx++) { - const struct cpuid_regs regs = cpuidex(eax, ecx); - if ((regs.ecx & UINT32_C(0x0000FF00)) == 0) { - break; + break; + case UINT32_C(0x00000007): + for (uint32_t ecx = 0; ecx <= max_structured_index; ecx++) { + const struct cpuid_regs regs = cpuidex(eax, ecx); + if (ecx == 0) { + max_structured_index = regs.eax; + has_sgx = !!(regs.ebx & UINT32_C(0x00000004)); + } + print_cpu_index(lp, n_log_proc); + print_cpuidex(regs, eax, ecx); } - print_cpuidex(regs, eax, ecx); - } - break; - case UINT32_C(0x00000012): - if (has_sgx) { + break; + case UINT32_C(0x0000000B): + case UINT32_C(0x0000001F): // Extended/V2 for (uint32_t ecx = 0;; ecx++) { const struct cpuid_regs regs = cpuidex(eax, ecx); - if (ecx >= 2 && (regs.eax & UINT32_C(0x0000000F)) == 0) { + if ((regs.ecx & UINT32_C(0x0000FF00)) == 0) { break; } + print_cpu_index(lp, n_log_proc); print_cpuidex(regs, eax, ecx); } - } - break; - case UINT32_C(0x00000014): - for (uint32_t ecx = 0; ecx <= max_trace_index; ecx++) { - const struct cpuid_regs regs = cpuidex(eax, ecx); - if (ecx == 0) { - max_trace_index = regs.eax; + break; + case UINT32_C(0x00000012): + if (has_sgx) { + for (uint32_t ecx = 0;; ecx++) { + const struct cpuid_regs regs = cpuidex(eax, ecx); + if (ecx >= 2 && (regs.eax & UINT32_C(0x0000000F)) == 0) { + break; + } + print_cpu_index(lp, n_log_proc); + print_cpuidex(regs, eax, ecx); + } + } + break; + case UINT32_C(0x00000014): + for (uint32_t ecx = 0; ecx <= max_trace_index; ecx++) { + const struct cpuid_regs regs = cpuidex(eax, ecx); + if (ecx == 0) { + max_trace_index = regs.eax; + } + print_cpu_index(lp, n_log_proc); + print_cpuidex(regs, eax, ecx); } - print_cpuidex(regs, eax, ecx); - } - break; - case UINT32_C(0x00000017): - for (uint32_t ecx = 0; ecx <= max_socid_index; ecx++) { - const struct cpuid_regs regs = cpuidex(eax, ecx); - if (ecx == 0) { - max_socid_index = regs.eax; + break; + case UINT32_C(0x00000017): + for (uint32_t ecx = 0; ecx <= max_socid_index; ecx++) { + const struct cpuid_regs regs = cpuidex(eax, ecx); + if (ecx == 0) { + max_socid_index = regs.eax; + } + print_cpu_index(lp, n_log_proc); + print_cpuidex(regs, eax, ecx); } - print_cpuidex(regs, eax, ecx); - } - break; - case UINT32_C(0x00000024): - for (uint32_t ecx = 0; ecx <= max_socid_index; ecx++) { - const struct cpuid_regs regs = cpuidex(eax, ecx); - if (ecx == 0) { - max_socid_index = regs.eax; + break; + case UINT32_C(0x00000024): + for (uint32_t ecx = 0; ecx <= max_socid_index; ecx++) { + const struct cpuid_regs regs = cpuidex(eax, ecx); + if (ecx == 0) { + max_socid_index = regs.eax; + } + print_cpu_index(lp, n_log_proc); + print_cpuidex(regs, eax, ecx); } - print_cpuidex(regs, eax, ecx); - } - break; - default: - print_cpuid(cpuidex(eax, 0), eax); - break; + break; + default: + print_cpu_index(lp, n_log_proc); + print_cpuid(cpuidex(eax, 0), eax); + break; + } + } + + /** + * CPUID[1].ECX bit 31 is supposed to indicate whether or not + * a hypervisor is running, but not all hypervisors set it. + */ + const uint32_t max_hypervisor_index = cpuid(UINT32_C(0x40000000)).eax; + for (uint32_t eax = UINT32_C(0x40000000); eax <= max_hypervisor_index; eax++) { + switch (eax) { + case UINT32_C(0x40000000): + print_cpu_index(lp, n_log_proc); + print_cpuid_string(cpuid(eax), eax, REG_BCD); + break; + case UINT32_C(0x40000001): + print_cpu_index(lp, n_log_proc); + print_cpuid_string(cpuid(eax), eax, REG_A); + break; + default: + print_cpu_index(lp, n_log_proc); + print_cpuid(cpuidex(eax, 0), eax); + } } - } - /** - * CPUID[1].ECX bit 31 is supposed to indicate whether or not - * a hypervisor is running, but not all hypervisors set it. - */ - const uint32_t max_hypervisor_index = cpuid(UINT32_C(0x40000000)).eax; - for (uint32_t eax = UINT32_C(0x40000000); eax <= max_hypervisor_index; eax++) { - switch (eax) { - case UINT32_C(0x40000000): - print_cpuid_string(cpuid(eax), eax, REG_BCD); - break; - case UINT32_C(0x40000001): - print_cpuid_string(cpuid(eax), eax, REG_A); - break; - default: - print_cpuid(cpuidex(eax, 0), eax); + const uint32_t max_extended_index = cpuid(UINT32_C(0x80000000)).eax; + for (uint32_t eax = UINT32_C(0x80000000); eax <= max_extended_index; eax++) { + switch (eax) { + case UINT32_C(0x80000000): + print_cpu_index(lp, n_log_proc); + print_cpuid_string(cpuid(eax), eax, REG_BDC); + break; + case UINT32_C(0x80000002): + case UINT32_C(0x80000003): + case UINT32_C(0x80000004): + print_cpu_index(lp, n_log_proc); + print_cpuid_string(cpuid(eax), eax, REG_ABCD); + break; + default: + print_cpu_index(lp, n_log_proc); + print_cpuid(cpuidex(eax, 0), eax); + } } } - const uint32_t max_extended_index = cpuid(UINT32_C(0x80000000)).eax; - for (uint32_t eax = UINT32_C(0x80000000); eax <= max_extended_index; eax++) { - switch (eax) { - case UINT32_C(0x80000000): - print_cpuid_string(cpuid(eax), eax, REG_BDC); - break; - case UINT32_C(0x80000002): - case UINT32_C(0x80000003): - case UINT32_C(0x80000004): - print_cpuid_string(cpuid(eax), eax, REG_ABCD); - break; - default: - print_cpuid(cpuidex(eax, 0), eax); + if (n_log_proc > 1) { +#ifdef __linux__ + if (sched_setaffinity(0, sizeof mask_default, &mask_default) != 0) { + perror("sched_setaffinity to restore process defaults failed"); + exit(1); } +#endif } } From 9ff9f5153b6a27f9c49515671086844d0894e8a7 Mon Sep 17 00:00:00 2001 From: Daniel Lenski Date: Fri, 21 Nov 2025 16:17:26 -0800 Subject: [PATCH 4/4] Don't try to count number of logical CPUs from CPUID itself This topology enumeration process is very complex (https://web.archive.org/web/20160306203252/https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration) and easy to get wrong. We have to rely on the operating system to allow us to switch CPUs, so we may as well rely on it to *count* CPUs as well. --- tools/cpuid-dump.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/tools/cpuid-dump.c b/tools/cpuid-dump.c index 4c87bc68..cb78ae45 100644 --- a/tools/cpuid-dump.c +++ b/tools/cpuid-dump.c @@ -105,28 +105,10 @@ int main(int argc, char** argv) { exit(1); } n_log_proc = CPU_COUNT(&mask_default); -#endif - if (max_base_index >= 1) { - // TODO: handle case of >256 logical CPUs, or multiple packages - const struct cpuid_regs regs = cpuid(1); - uint32_t n_apic_id; - if (regs.edx & UINT32_C(0x10000000)) { // Number of logical CPUs field is valid - n_apic_id = ((regs.ebx & UINT32_C(0x00ff0000)) >> 16); - -#ifdef __linux__ - if (n_apic_id != n_log_proc) - fprintf(stderr, - "WARNING: %d logical CPUs per CPUID.01h.EBX[23:16] != %d per sched_getaffinity()\n", - n_apic_id, - n_log_proc); #else - if (n_apic_id > 1) - fprintf(stderr, - "WARNING: %d logical CPUs per CPUID.01h.EBX, results may vary by logical CPU\n", - n_apic_id); + fprintf(stderr, + "WARNING: results may vary by CPU, core or thread, but switching CPU is unsupported.\n"); #endif - } - } for (uint32_t lp = 0; lp < n_log_proc; lp++) { force_one_cpu(lp, n_log_proc);