@@ -302,6 +302,16 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
302302 return 0 ;
303303}
304304
305+ static void vmx_cleanup_l1d_flush (void )
306+ {
307+ if (vmx_l1d_flush_pages ) {
308+ free_pages ((unsigned long )vmx_l1d_flush_pages , L1D_CACHE_ORDER );
309+ vmx_l1d_flush_pages = NULL ;
310+ }
311+ /* Restore state so sysfs ignores VMX */
312+ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO ;
313+ }
314+
305315static int vmentry_l1d_flush_parse (const char * s )
306316{
307317 unsigned int i ;
@@ -352,6 +362,83 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
352362 return sysfs_emit (s , "%s\n" , vmentry_l1d_param [l1tf_vmx_mitigation ].option );
353363}
354364
365+ /*
366+ * Software based L1D cache flush which is used when microcode providing
367+ * the cache control MSR is not loaded.
368+ *
369+ * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
370+ * flush it is required to read in 64 KiB because the replacement algorithm
371+ * is not exactly LRU. This could be sized at runtime via topology
372+ * information but as all relevant affected CPUs have 32KiB L1D cache size
373+ * there is no point in doing so.
374+ */
375+ static noinstr void vmx_l1d_flush (struct kvm_vcpu * vcpu )
376+ {
377+ int size = PAGE_SIZE << L1D_CACHE_ORDER ;
378+
379+ /*
380+ * This code is only executed when the flush mode is 'cond' or
381+ * 'always'
382+ */
383+ if (static_branch_likely (& vmx_l1d_flush_cond )) {
384+ bool flush_l1d ;
385+
386+ /*
387+ * Clear the per-vcpu flush bit, it gets set again if the vCPU
388+ * is reloaded, i.e. if the vCPU is scheduled out or if KVM
389+ * exits to userspace, or if KVM reaches one of the unsafe
390+ * VMEXIT handlers, e.g. if KVM calls into the emulator.
391+ */
392+ flush_l1d = vcpu -> arch .l1tf_flush_l1d ;
393+ vcpu -> arch .l1tf_flush_l1d = false;
394+
395+ /*
396+ * Clear the per-cpu flush bit, it gets set again from
397+ * the interrupt handlers.
398+ */
399+ flush_l1d |= kvm_get_cpu_l1tf_flush_l1d ();
400+ kvm_clear_cpu_l1tf_flush_l1d ();
401+
402+ if (!flush_l1d )
403+ return ;
404+ }
405+
406+ vcpu -> stat .l1d_flush ++ ;
407+
408+ if (static_cpu_has (X86_FEATURE_FLUSH_L1D )) {
409+ native_wrmsrq (MSR_IA32_FLUSH_CMD , L1D_FLUSH );
410+ return ;
411+ }
412+
413+ asm volatile (
414+ /* First ensure the pages are in the TLB */
415+ "xorl %%eax, %%eax\n"
416+ ".Lpopulate_tlb:\n\t"
417+ "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
418+ "addl $4096, %%eax\n\t"
419+ "cmpl %%eax, %[size]\n\t"
420+ "jne .Lpopulate_tlb\n\t"
421+ "xorl %%eax, %%eax\n\t"
422+ "cpuid\n\t"
423+ /* Now fill the cache */
424+ "xorl %%eax, %%eax\n"
425+ ".Lfill_cache:\n"
426+ "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
427+ "addl $64, %%eax\n\t"
428+ "cmpl %%eax, %[size]\n\t"
429+ "jne .Lfill_cache\n\t"
430+ "lfence\n"
431+ :: [flush_pages ] "r" (vmx_l1d_flush_pages ),
432+ [size ] "r" (size )
433+ : "eax" , "ebx" , "ecx" , "edx" );
434+ }
435+
436+ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
437+ .set = vmentry_l1d_flush_set ,
438+ .get = vmentry_l1d_flush_get ,
439+ };
440+ module_param_cb (vmentry_l1d_flush , & vmentry_l1d_flush_ops , NULL , 0644 );
441+
355442static __always_inline void vmx_disable_fb_clear (struct vcpu_vmx * vmx )
356443{
357444 u64 msr ;
@@ -404,12 +491,6 @@ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
404491 vmx -> disable_fb_clear = false;
405492}
406493
407- static const struct kernel_param_ops vmentry_l1d_flush_ops = {
408- .set = vmentry_l1d_flush_set ,
409- .get = vmentry_l1d_flush_get ,
410- };
411- module_param_cb (vmentry_l1d_flush , & vmentry_l1d_flush_ops , NULL , 0644 );
412-
413494static u32 vmx_segment_access_rights (struct kvm_segment * var );
414495
415496void vmx_vmexit (void );
@@ -6646,77 +6727,6 @@ int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
66466727 return ret ;
66476728}
66486729
6649- /*
6650- * Software based L1D cache flush which is used when microcode providing
6651- * the cache control MSR is not loaded.
6652- *
6653- * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
6654- * flush it is required to read in 64 KiB because the replacement algorithm
6655- * is not exactly LRU. This could be sized at runtime via topology
6656- * information but as all relevant affected CPUs have 32KiB L1D cache size
6657- * there is no point in doing so.
6658- */
6659- static noinstr void vmx_l1d_flush (struct kvm_vcpu * vcpu )
6660- {
6661- int size = PAGE_SIZE << L1D_CACHE_ORDER ;
6662-
6663- /*
6664- * This code is only executed when the flush mode is 'cond' or
6665- * 'always'
6666- */
6667- if (static_branch_likely (& vmx_l1d_flush_cond )) {
6668- bool flush_l1d ;
6669-
6670- /*
6671- * Clear the per-vcpu flush bit, it gets set again if the vCPU
6672- * is reloaded, i.e. if the vCPU is scheduled out or if KVM
6673- * exits to userspace, or if KVM reaches one of the unsafe
6674- * VMEXIT handlers, e.g. if KVM calls into the emulator.
6675- */
6676- flush_l1d = vcpu -> arch .l1tf_flush_l1d ;
6677- vcpu -> arch .l1tf_flush_l1d = false;
6678-
6679- /*
6680- * Clear the per-cpu flush bit, it gets set again from
6681- * the interrupt handlers.
6682- */
6683- flush_l1d |= kvm_get_cpu_l1tf_flush_l1d ();
6684- kvm_clear_cpu_l1tf_flush_l1d ();
6685-
6686- if (!flush_l1d )
6687- return ;
6688- }
6689-
6690- vcpu -> stat .l1d_flush ++ ;
6691-
6692- if (static_cpu_has (X86_FEATURE_FLUSH_L1D )) {
6693- native_wrmsrq (MSR_IA32_FLUSH_CMD , L1D_FLUSH );
6694- return ;
6695- }
6696-
6697- asm volatile (
6698- /* First ensure the pages are in the TLB */
6699- "xorl %%eax, %%eax\n"
6700- ".Lpopulate_tlb:\n\t"
6701- "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6702- "addl $4096, %%eax\n\t"
6703- "cmpl %%eax, %[size]\n\t"
6704- "jne .Lpopulate_tlb\n\t"
6705- "xorl %%eax, %%eax\n\t"
6706- "cpuid\n\t"
6707- /* Now fill the cache */
6708- "xorl %%eax, %%eax\n"
6709- ".Lfill_cache:\n"
6710- "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6711- "addl $64, %%eax\n\t"
6712- "cmpl %%eax, %[size]\n\t"
6713- "jne .Lfill_cache\n\t"
6714- "lfence\n"
6715- :: [flush_pages ] "r" (vmx_l1d_flush_pages ),
6716- [size ] "r" (size )
6717- : "eax" , "ebx" , "ecx" , "edx" );
6718- }
6719-
67206730void vmx_update_cr8_intercept (struct kvm_vcpu * vcpu , int tpr , int irr )
67216731{
67226732 struct vmcs12 * vmcs12 = get_vmcs12 (vcpu );
@@ -8651,16 +8661,6 @@ __init int vmx_hardware_setup(void)
86518661 return r ;
86528662}
86538663
8654- static void vmx_cleanup_l1d_flush (void )
8655- {
8656- if (vmx_l1d_flush_pages ) {
8657- free_pages ((unsigned long )vmx_l1d_flush_pages , L1D_CACHE_ORDER );
8658- vmx_l1d_flush_pages = NULL ;
8659- }
8660- /* Restore state so sysfs ignores VMX */
8661- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO ;
8662- }
8663-
86648664void vmx_exit (void )
86658665{
86668666 allow_smaller_maxphyaddr = false;
0 commit comments