Skip to content

Commit d6581b6

Browse files
Szy0127sean-jc
authored andcommitted
KVM: SVM: Flush cache only on CPUs running SEV guest
On AMD CPUs without ensuring cache consistency, each memory page reclamation in an SEV guest triggers a call to do WBNOINVD/WBINVD on all CPUs, thereby affecting the performance of other programs on the host. Typically, an AMD server may have 128 cores or more, while the SEV guest might only utilize 8 of these cores. Meanwhile, host can use qemu-affinity to bind these 8 vCPUs to specific physical CPUs. Therefore, keeping a record of the physical core numbers each time a vCPU runs can help avoid flushing the cache for all CPUs every time. Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Signed-off-by: Zheyun Shen <szy0127@sjtu.edu.cn> Co-developed-by: Sean Christopherson <seanjc@google.com> Link: https://lore.kernel.org/r/20250522233733.3176144-9-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent a77896e commit d6581b6

File tree

2 files changed

+40
-7
lines changed

2 files changed

+40
-7
lines changed

arch/x86/kvm/svm/sev.c

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
447447
init_args.probe = false;
448448
ret = sev_platform_init(&init_args);
449449
if (ret)
450-
goto e_free;
450+
goto e_free_asid;
451+
452+
if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
453+
ret = -ENOMEM;
454+
goto e_free_asid;
455+
}
451456

452457
/* This needs to happen after SEV/SNP firmware initialization. */
453458
if (vm_type == KVM_X86_SNP_VM) {
@@ -465,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
465470
return 0;
466471

467472
e_free:
473+
free_cpumask_var(sev->have_run_cpus);
474+
e_free_asid:
468475
argp->error = init_args.error;
469476
sev_asid_free(sev);
470477
sev->asid = 0;
@@ -709,16 +716,31 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
709716
}
710717
}
711718

712-
static void sev_writeback_caches(void)
719+
static void sev_writeback_caches(struct kvm *kvm)
713720
{
721+
/*
722+
* Note, the caller is responsible for ensuring correctness if the mask
723+
* can be modified, e.g. if a CPU could be doing VMRUN.
724+
*/
725+
if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus))
726+
return;
727+
714728
/*
715729
* Ensure that all dirty guest tagged cache entries are written back
716730
* before releasing the pages back to the system for use. CLFLUSH will
717731
* not do this without SME_COHERENT, and flushing many cache lines
718732
* individually is slower than blasting WBINVD for large VMs, so issue
719-
* WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported).
733+
* WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
734+
* on CPUs that have done VMRUN, i.e. may have dirtied data using the
735+
* VM's ASID.
736+
*
737+
* For simplicity, never remove CPUs from the bitmap. Ideally, KVM
738+
* would clear the mask when flushing caches, but doing so requires
739+
* serializing multiple calls and having responding CPUs (to the IPI)
740+
* mark themselves as still running if they are running (or about to
741+
* run) a vCPU for the VM.
720742
*/
721-
wbnoinvd_on_all_cpus();
743+
wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus);
722744
}
723745

724746
static unsigned long get_num_contig_pages(unsigned long idx,
@@ -2707,7 +2729,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
27072729
goto failed;
27082730
}
27092731

2710-
sev_writeback_caches();
2732+
sev_writeback_caches(kvm);
27112733

27122734
__unregister_enc_region_locked(kvm, region);
27132735

@@ -2855,6 +2877,7 @@ void sev_vm_destroy(struct kvm *kvm)
28552877
}
28562878

28572879
sev_asid_free(sev);
2880+
free_cpumask_var(sev->have_run_cpus);
28582881
}
28592882

28602883
void __init sev_set_cpu_caps(void)
@@ -3106,7 +3129,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
31063129
return;
31073130

31083131
do_sev_writeback_caches:
3109-
sev_writeback_caches();
3132+
sev_writeback_caches(vcpu->kvm);
31103133
}
31113134

31123135
void sev_guest_memory_reclaimed(struct kvm *kvm)
@@ -3119,7 +3142,7 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
31193142
if (!sev_guest(kvm) || sev_snp_guest(kvm))
31203143
return;
31213144

3122-
sev_writeback_caches();
3145+
sev_writeback_caches(kvm);
31233146
}
31243147

31253148
void sev_free_vcpu(struct kvm_vcpu *vcpu)
@@ -3451,6 +3474,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu)
34513474
if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
34523475
return -EINVAL;
34533476

3477+
/*
3478+
* To optimize cache flushes when memory is reclaimed from an SEV VM,
3479+
* track physical CPUs that enter the guest for SEV VMs and thus can
3480+
* have encrypted, dirty data in the cache, and flush caches only for
3481+
* CPUs that have entered the guest.
3482+
*/
3483+
if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
3484+
cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);
3485+
34543486
/* Assign the asid allocated with this SEV guest */
34553487
svm->asid = asid;
34563488

arch/x86/kvm/svm/svm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ struct kvm_sev_info {
113113
void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */
114114
void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */
115115
struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
116+
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
116117
};
117118

118119
#define SEV_POLICY_NODBG BIT_ULL(0)

0 commit comments

Comments
 (0)