Skip to content

Commit 7649412

Browse files
committed
KVM: x86: Load guest/host PKRU outside of the fastpath run loop
Move KVM's swapping of PKRU outside of the fastpath loop, as there is no KVM code anywhere in the fastpath that accesses guest/userspace memory, i.e. that can consume protection keys. As documented by commit 1be0e61 ("KVM, pkeys: save/restore PKRU when guest/host switches"), KVM just needs to ensure the host's PKRU is loaded when KVM (or the kernel at-large) may access userspace memory. And at the time of commit 1be0e61, KVM didn't have a fastpath, and PKU was strictly contained to VMX, i.e. there was no reason to swap PKRU outside of vmx_vcpu_run(). Over time, the "need" to swap PKRU close to VM-Enter was likely falsely solidified by the association with XFEATUREs in commit 3748613 ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c"), and XFEATURE swapping was in turn moved close to VM-Enter/VM-Exit as a KVM hack-a-fix ution for an #MC handler bug by commit 1811d97 ("x86/kvm: move kvm_load/put_guest_xcr0 into atomic context"). Deferring the PKRU loads shaves ~40 cycles off the fastpath for Intel, and ~60 cycles for AMD. E.g. using INVD in KVM-Unit-Test's vmexit.c, with extra hacks to enable CR4.PKE and PKRU=(-1u & ~0x3), latency numbers for AMD Turin go from ~1560 => ~1500, and for Intel Emerald Rapids, go from ~810 => ~770. Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Reviewed-by: Jon Kohler <jon@nutanix.com> Link: https://patch.msgid.link/20251118222328.2265758-5-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 75c69c8 commit 7649412

File tree

4 files changed

+10
-12
lines changed

4 files changed

+10
-12
lines changed

arch/x86/kvm/svm/svm.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4250,7 +4250,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
42504250
svm_set_dr6(vcpu, DR6_ACTIVE_LOW);
42514251

42524252
clgi();
4253-
kvm_load_guest_xsave_state(vcpu);
42544253

42554254
/*
42564255
* Hardware only context switches DEBUGCTL if LBR virtualization is
@@ -4293,7 +4292,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
42934292
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
42944293
update_debugctlmsr(vcpu->arch.host_debugctl);
42954294

4296-
kvm_load_host_xsave_state(vcpu);
42974295
stgi();
42984296

42994297
/* Any pending NMI will happen here */

arch/x86/kvm/vmx/vmx.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7473,8 +7473,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
74737473
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
74747474
vmx_set_interrupt_shadow(vcpu, 0);
74757475

7476-
kvm_load_guest_xsave_state(vcpu);
7477-
74787476
pt_guest_enter(vmx);
74797477

74807478
atomic_switch_perf_msrs(vmx);
@@ -7518,8 +7516,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
75187516

75197517
pt_guest_exit(vmx);
75207518

7521-
kvm_load_host_xsave_state(vcpu);
7522-
75237519
if (is_guest_mode(vcpu)) {
75247520
/*
75257521
* Track VMLAUNCH/VMRESUME that have made past guest state

arch/x86/kvm/x86.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,7 @@ static void kvm_load_host_xfeatures(struct kvm_vcpu *vcpu)
12351235
}
12361236
}
12371237

1238-
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
1238+
static void kvm_load_guest_pkru(struct kvm_vcpu *vcpu)
12391239
{
12401240
if (vcpu->arch.guest_state_protected)
12411241
return;
@@ -1246,9 +1246,8 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
12461246
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
12471247
wrpkru(vcpu->arch.pkru);
12481248
}
1249-
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_guest_xsave_state);
12501249

1251-
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
1250+
static void kvm_load_host_pkru(struct kvm_vcpu *vcpu)
12521251
{
12531252
if (vcpu->arch.guest_state_protected)
12541253
return;
@@ -1261,7 +1260,6 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
12611260
wrpkru(vcpu->arch.host_pkru);
12621261
}
12631262
}
1264-
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_host_xsave_state);
12651263

12661264
#ifdef CONFIG_X86_64
12671265
static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
@@ -11303,6 +11301,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
1130311301

1130411302
guest_timing_enter_irqoff();
1130511303

11304+
/*
11305+
* Swap PKRU with hardware breakpoints disabled to minimize the number
11306+
* of flows where non-KVM code can run with guest state loaded.
11307+
*/
11308+
kvm_load_guest_pkru(vcpu);
11309+
1130611310
for (;;) {
1130711311
/*
1130811312
* Assert that vCPU vs. VM APICv state is consistent. An APICv
@@ -11331,6 +11335,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
1133111335
++vcpu->stat.exits;
1133211336
}
1133311337

11338+
kvm_load_host_pkru(vcpu);
11339+
1133411340
/*
1133511341
* Do this here before restoring debug registers on the host. And
1133611342
* since we do this before handling the vmexit, a DR access vmexit

arch/x86/kvm/x86.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -636,8 +636,6 @@ static inline void kvm_machine_check(void)
636636
#endif
637637
}
638638

639-
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
640-
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
641639
int kvm_spec_ctrl_test_value(u64 value);
642640
int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
643641
struct x86_exception *e);

0 commit comments

Comments
 (0)