diff --git a/docs/hyperv.txt b/docs/hyperv.txt index e53c581f4586..000638a2fd38 100644 --- a/docs/hyperv.txt +++ b/docs/hyperv.txt @@ -170,7 +170,7 @@ Recommended: hv-frequencies 3.16. hv-evmcs =============== The enlightenment is nested specific, it targets Hyper-V on KVM guests. When -enabled, it provides Enlightened VMCS feature to the guest. The feature +enabled, it provides Enlightened VMCS version 1 feature to the guest. The feature implements paravirtualized protocol between L0 (KVM) and L1 (Hyper-V) hypervisors making L2 exits to the hypervisor faster. The feature is Intel-only. Note: some virtualization features (e.g. Posted Interrupts) are disabled when @@ -209,8 +209,11 @@ In some cases (e.g. during development) it may make sense to use QEMU in 'pass-through' mode and give Windows guests all enlightenments currently supported by KVM. This pass-through mode is enabled by "hv-passthrough" CPU flag. -Note: enabling this flag effectively prevents migration as supported features -may differ between target and destination. +Note: "hv-passthrough" flag only enables enlightenments which are known to QEMU +(have corresponding "hv-*" flag) and copies "hv-spinlocks="/"hv-vendor-id=" +values from KVM to QEMU. "hv-passthrough" overrides all other "hv-*" settings on +the command line. Also, enabling this flag effectively prevents migration as the +list of enabled enlightenments may differ between target and destination hosts. 4. Useful links diff --git a/hw/core/machine.c b/hw/core/machine.c index 57c18f909ab0..6f59fb0b7f2c 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -728,7 +728,8 @@ void machine_set_cpu_numa_node(MachineState *machine, if ((numa_info[props->node_id].initiator < MAX_NODES) && (props->node_id != numa_info[props->node_id].initiator)) { error_setg(errp, "The initiator of CPU NUMA node %" PRId64 - " should be itself", props->node_id); + " should be itself (got %" PRIu16 ")", + props->node_id, numa_info[props->node_id].initiator); return; } numa_info[props->node_id].has_cpu = true; diff --git a/hw/core/numa.c b/hw/core/numa.c index 1058d3697b15..510d096a8886 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -88,6 +88,29 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, return; } + /* + * If not set the initiator, set it to MAX_NODES. And if + * HMAT is enabled and this node has no cpus, QEMU will raise error. + */ + numa_info[nodenr].initiator = MAX_NODES; + if (node->has_initiator) { + if (!ms->numa_state->hmat_enabled) { + error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " + "(HMAT) is disabled, enable it with -machine hmat=on " + "before using any of hmat specific options"); + return; + } + + if (node->initiator >= MAX_NODES) { + error_report("The initiator id %" PRIu16 " expects an integer " + "between 0 and %d", node->initiator, + MAX_NODES - 1); + return; + } + + numa_info[nodenr].initiator = node->initiator; + } + for (cpus = node->cpus; cpus; cpus = cpus->next) { CpuInstanceProperties props; if (cpus->value >= max_cpus) { @@ -142,28 +165,6 @@ static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); } - /* - * If not set the initiator, set it to MAX_NODES. And if - * HMAT is enabled and this node has no cpus, QEMU will raise error. - */ - numa_info[nodenr].initiator = MAX_NODES; - if (node->has_initiator) { - if (!ms->numa_state->hmat_enabled) { - error_setg(errp, "ACPI Heterogeneous Memory Attribute Table " - "(HMAT) is disabled, enable it with -machine hmat=on " - "before using any of hmat specific options"); - return; - } - - if (node->initiator >= MAX_NODES) { - error_report("The initiator id %" PRIu16 " expects an integer " - "between 0 and %d", node->initiator, - MAX_NODES - 1); - return; - } - - numa_info[nodenr].initiator = node->initiator; - } numa_info[nodenr].present = true; max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); ms->numa_state->num_nodes++; diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 8e1220db7285..aa79c5e0e6ff 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -98,6 +98,7 @@ GlobalProperty pc_compat_6_0[] = { { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, + { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, }; const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 5f595a0d7e20..48b55ebd0a67 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5155,6 +5155,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (cpu->cache_info_passthrough) { host_cpuid(index, 0, eax, ebx, ecx, edx); break; + } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; + break; } *eax = 1; /* Number of CPUID[EAX=2] calls required */ *ebx = 0; @@ -5176,6 +5179,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if ((*eax & 31) && cs->nr_cores > 1) { *eax |= (cs->nr_cores - 1) << 26; } + } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; } else { *eax = 0; switch (count) { @@ -5945,8 +5950,15 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } - /* CPU topology with multi-dies support requires CPUID[0x1F] */ - if (env->nr_dies > 1) { + /* + * Intel CPU topology with multi-dies support requires CPUID[0x1F]. + * For AMD Rome/Milan, cpuid level is 0x10, and guest OS should detect + * extended toplogy by leaf 0xB. Only adjust it for Intel CPU, unless + * cpu->vendor_cpuid_only has been unset for compatibility with older + * machine types. + */ + if ((env->nr_dies > 1) && + (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -5974,6 +5986,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) if (env->cpuid_xlevel2 == UINT32_MAX) { env->cpuid_xlevel2 = env->cpuid_min_xlevel2; } + + if (kvm_enabled()) { + kvm_hyperv_expand_features(cpu, errp); + } } /* @@ -6647,6 +6663,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), + DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8f3747dd2858..950a991a71cb 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1748,6 +1748,9 @@ struct X86CPU { /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; + /* Only advertise CPUID leaves defined by the vendor */ + bool vendor_cpuid_only; + /* Enable auto level-increase for Intel Processor Trace leave */ bool intel_pt_auto_level; diff --git a/target/i386/kvm/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h index e30d64b4ade4..5fbb385cc136 100644 --- a/target/i386/kvm/hyperv-proto.h +++ b/target/i386/kvm/hyperv-proto.h @@ -38,6 +38,12 @@ #define HV_ACCESS_FREQUENCY_MSRS (1u << 11) #define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) +/* + * HV_CPUID_FEATURES.EBX bits + */ +#define HV_POST_MESSAGES (1u << 4) +#define HV_SIGNAL_EVENTS (1u << 5) + /* * HV_CPUID_FEATURES.EDX bits */ diff --git a/target/i386/kvm/kvm-stub.c b/target/i386/kvm/kvm-stub.c index 92f49121b8fa..f6e7e4466e1a 100644 --- a/target/i386/kvm/kvm-stub.c +++ b/target/i386/kvm/kvm-stub.c @@ -39,3 +39,8 @@ bool kvm_hv_vpindex_settable(void) { return false; } + +bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp) +{ + abort(); +} diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a85035492fba..59ed8327ac13 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -813,8 +813,6 @@ static struct { [HYPERV_FEAT_RELAXED] = { .desc = "relaxed timing (hv-relaxed)", .flags = { - {.func = HV_CPUID_FEATURES, .reg = R_EAX, - .bits = HV_HYPERCALL_AVAILABLE}, {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, .bits = HV_RELAXED_TIMING_RECOMMENDED} } @@ -823,7 +821,7 @@ static struct { .desc = "virtual APIC (hv-vapic)", .flags = { {.func = HV_CPUID_FEATURES, .reg = R_EAX, - .bits = HV_HYPERCALL_AVAILABLE | HV_APIC_ACCESS_AVAILABLE}, + .bits = HV_APIC_ACCESS_AVAILABLE}, {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, .bits = HV_APIC_ACCESS_RECOMMENDED} } @@ -832,8 +830,7 @@ static struct { .desc = "clocksources (hv-time)", .flags = { {.func = HV_CPUID_FEATURES, .reg = R_EAX, - .bits = HV_HYPERCALL_AVAILABLE | HV_TIME_REF_COUNT_AVAILABLE | - HV_REFERENCE_TSC_AVAILABLE} + .bits = HV_TIME_REF_COUNT_AVAILABLE | HV_REFERENCE_TSC_AVAILABLE} } }, [HYPERV_FEAT_CRASH] = { @@ -1148,16 +1145,12 @@ static bool hyperv_feature_supported(CPUState *cs, int feature) return true; } -static int hv_cpuid_check_and_set(CPUState *cs, int feature, Error **errp) +/* Checks that all feature dependencies are enabled */ +static bool hv_feature_check_deps(X86CPU *cpu, int feature, Error **errp) { - X86CPU *cpu = X86_CPU(cs); uint64_t deps; int dep_feat; - if (!hyperv_feat_enabled(cpu, feature) && !cpu->hyperv_passthrough) { - return 0; - } - deps = kvm_hyperv_properties[feature].dependencies; while (deps) { dep_feat = ctz64(deps); @@ -1165,26 +1158,12 @@ static int hv_cpuid_check_and_set(CPUState *cs, int feature, Error **errp) error_setg(errp, "Hyper-V %s requires Hyper-V %s", kvm_hyperv_properties[feature].desc, kvm_hyperv_properties[dep_feat].desc); - return 1; + return false; } deps &= ~(1ull << dep_feat); } - if (!hyperv_feature_supported(cs, feature)) { - if (hyperv_feat_enabled(cpu, feature)) { - error_setg(errp, "Hyper-V %s is not supported by kernel", - kvm_hyperv_properties[feature].desc); - return 1; - } else { - return 0; - } - } - - if (cpu->hyperv_passthrough) { - cpu->hyperv_features |= BIT(feature); - } - - return 0; + return true; } static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg) @@ -1220,12 +1199,23 @@ static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg) * of 'hv_passthrough' mode and fills the environment with all supported * Hyper-V features. */ -static void hyperv_expand_features(CPUState *cs, Error **errp) +bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp) { - X86CPU *cpu = X86_CPU(cs); + CPUState *cs = CPU(cpu); + Error *local_err = NULL; + int feat; if (!hyperv_enabled(cpu)) - return; + return true; + + /* + * When kvm_hyperv_expand_features is called at CPU feature expansion + * time per-CPU kvm_state is not available yet so we can only proceed + * when KVM_CAP_SYS_HYPERV_CPUID is supported. + */ + if (!cs->kvm_state && + !kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID)) + return true; if (cpu->hyperv_passthrough) { cpu->hyperv_vendor_id[0] = @@ -1269,53 +1259,37 @@ static void hyperv_expand_features(CPUState *cs, Error **errp) cpu->hyperv_spinlock_attempts = hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EBX); - } - /* Features */ - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_RELAXED, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_VAPIC, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_TIME, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_CRASH, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_RESET, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_VPINDEX, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_RUNTIME, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_SYNIC, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_STIMER, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_FREQUENCIES, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_REENLIGHTENMENT, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_TLBFLUSH, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_EVMCS, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_IPI, errp)) { - return; - } - if (hv_cpuid_check_and_set(cs, HYPERV_FEAT_STIMER_DIRECT, errp)) { - return; + /* + * Mark feature as enabled in 'cpu->hyperv_features' as + * hv_build_cpuid_leaf() uses this info to build guest CPUIDs. + */ + for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) { + if (hyperv_feature_supported(cs, feat)) { + cpu->hyperv_features |= BIT(feat); + } + } + } else { + /* Check features availability and dependencies */ + for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) { + /* If the feature was not requested skip it. */ + if (!hyperv_feat_enabled(cpu, feat)) { + continue; + } + + /* Check if the feature is supported by KVM */ + if (!hyperv_feature_supported(cs, feat)) { + error_setg(errp, "Hyper-V %s is not supported by kernel", + kvm_hyperv_properties[feat].desc); + return false; + } + + /* Check dependencies */ + if (!hv_feature_check_deps(cpu, feat, &local_err)) { + error_propagate(errp, local_err); + return false; + } + } } /* Additional dependencies not covered by kvm_hyperv_properties[] */ @@ -1325,7 +1299,10 @@ static void hyperv_expand_features(CPUState *cs, Error **errp) error_setg(errp, "Hyper-V %s requires Hyper-V %s", kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc, kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc); + return false; } + + return true; } /* @@ -1366,6 +1343,15 @@ static int hyperv_fill_cpuids(CPUState *cs, c->ebx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EBX); c->edx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EDX); + /* Unconditionally required with any Hyper-V enlightenment */ + c->eax |= HV_HYPERCALL_AVAILABLE; + + /* SynIC and Vmbus devices require messages/signals hypercalls */ + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) && + !cpu->hyperv_synic_kvm_only) { + c->ebx |= HV_POST_MESSAGES | HV_SIGNAL_EVENTS; + } + /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ c->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; @@ -1409,6 +1395,21 @@ static int hyperv_fill_cpuids(CPUState *cs, static Error *hv_passthrough_mig_blocker; static Error *hv_no_nonarch_cs_mig_blocker; +/* Checks that the exposed eVMCS version range is supported by KVM */ +static bool evmcs_version_supported(uint16_t evmcs_version, + uint16_t supported_evmcs_version) +{ + uint8_t min_version = evmcs_version & 0xff; + uint8_t max_version = evmcs_version >> 8; + uint8_t min_supported_version = supported_evmcs_version & 0xff; + uint8_t max_supported_version = supported_evmcs_version >> 8; + + return (min_version >= min_supported_version) && + (max_version <= max_supported_version); +} + +#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1) + static int hyperv_init_vcpu(X86CPU *cpu) { CPUState *cs = CPU(cpu); @@ -1488,17 +1489,33 @@ static int hyperv_init_vcpu(X86CPU *cpu) } if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { - uint16_t evmcs_version; + uint16_t evmcs_version = DEFAULT_EVMCS_VERSION; + uint16_t supported_evmcs_version; ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, - (uintptr_t)&evmcs_version); + (uintptr_t)&supported_evmcs_version); + /* + * KVM is required to support EVMCS ver.1. as that's what 'hv-evmcs' + * option sets. Note: we hardcode the maximum supported eVMCS version + * to '1' as well so 'hv-evmcs' feature is migratable even when (and if) + * ver.2 is implemented. A new option (e.g. 'hv-evmcs=2') will then have + * to be added. + */ if (ret < 0) { - fprintf(stderr, "Hyper-V %s is not supported by kernel\n", - kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc); + error_report("Hyper-V %s is not supported by kernel", + kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc); return ret; } + if (!evmcs_version_supported(evmcs_version, supported_evmcs_version)) { + error_report("eVMCS version range [%d..%d] is not supported by " + "kernel (supported: [%d..%d])", evmcs_version & 0xff, + evmcs_version >> 8, supported_evmcs_version & 0xff, + supported_evmcs_version >> 8); + return -ENOTSUP; + } + cpu->hyperv_nested[0] = evmcs_version; } @@ -1559,9 +1576,15 @@ int kvm_arch_init_vcpu(CPUState *cs) env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; - /* Paravirtualization CPUIDs */ - hyperv_expand_features(cs, &local_err); - if (local_err) { + /* + * kvm_hyperv_expand_features() is called here for the second time in case + * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle + * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to + * check which Hyper-V enlightenments are supported and which are not, we + * can still proceed and check/expand Hyper-V enlightenments here so legacy + * behavior is preserved. + */ + if (!kvm_hyperv_expand_features(cpu, &local_err)) { error_report_err(local_err); return -ENOSYS; } diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index dc725083891c..54667b35f09c 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -47,6 +47,7 @@ bool kvm_has_x2apic_api(void); bool kvm_has_waitpkg(void); bool kvm_hv_vpindex_settable(void); +bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);