Skip to content

Commit 26044af

Browse files
committed
x86/crash: Disable virt in core NMI crash handler to avoid double shootdown
Disable virtualization in crash_nmi_callback() and rework the emergency_vmx_disable_all() path to do an NMI shootdown if and only if a shootdown has not already occurred. NMI crash shootdown fundamentally can't support multiple invocations as responding CPUs are deliberately put into halt state without unblocking NMIs. But, the emergency reboot path doesn't have any work of its own, it simply cares about disabling virtualization, i.e. so long as a shootdown occurred, emergency reboot doesn't care who initiated the shootdown, or when. If "crash_kexec_post_notifiers" is specified on the kernel command line, panic() will invoke crash_smp_send_stop() and result in a second call to nmi_shootdown_cpus() during native_machine_emergency_restart(). Invoke the callback _before_ disabling virtualization, as the current VMCS needs to be cleared before doing VMXOFF. Note, this results in a subtle change in ordering between disabling virtualization and stopping Intel PT on the responding CPUs. While VMX and Intel PT do interact, VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one another, which is all that matters when panicking. Harden nmi_shootdown_cpus() against multiple invocations to try and capture any such kernel bugs via a WARN instead of hanging the system during a crash/dump, e.g. prior to the recent hardening of register_nmi_handler(), re-registering the NMI handler would trigger a double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y. list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800. WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70 Call Trace: __register_nmi_handler+0xcf/0x130 nmi_shootdown_cpus+0x39/0x90 native_machine_emergency_restart+0x1c9/0x1d0 panic+0x237/0x29b Extract the disabling logic to a common helper to deduplicate code, and to prepare for doing the shootdown in the emergency reboot path if SVM is supported. Note, prior to commit ed72736 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected against a second invocation by a cpu_vmx_enabled() check as the kdump handler would disable VMX if it ran first. Fixes: ed72736 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported") Cc: stable@vger.kernel.org Reported-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Cc: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent f422f85 commit 26044af

File tree

3 files changed

+56
-28
lines changed

3 files changed

+56
-28
lines changed

arch/x86/include/asm/reboot.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
2525
#define MRR_BIOS 0
2626
#define MRR_APM 1
2727

28+
void cpu_emergency_disable_virtualization(void);
29+
2830
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
2931
void nmi_panic_self_stop(struct pt_regs *regs);
3032
void nmi_shootdown_cpus(nmi_shootdown_cb callback);

arch/x86/kernel/crash.c

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
#include <linux/kdebug.h>
3838
#include <asm/cpu.h>
3939
#include <asm/reboot.h>
40-
#include <asm/virtext.h>
4140
#include <asm/intel_pt.h>
4241
#include <asm/crash.h>
4342
#include <asm/cmdline.h>
@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
8180
*/
8281
cpu_crash_vmclear_loaded_vmcss();
8382

84-
/* Disable VMX or SVM if needed.
85-
*
86-
* We need to disable virtualization on all CPUs.
87-
* Having VMX or SVM enabled on any CPU may break rebooting
88-
* after the kdump kernel has finished its task.
89-
*/
90-
cpu_emergency_vmxoff();
91-
cpu_emergency_svm_disable();
92-
9383
/*
9484
* Disable Intel PT to stop its logging
9585
*/
@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
148138
*/
149139
cpu_crash_vmclear_loaded_vmcss();
150140

151-
/* Booting kdump kernel with VMX or SVM enabled won't work,
152-
* because (among other limitations) we can't disable paging
153-
* with the virt flags.
154-
*/
155-
cpu_emergency_vmxoff();
156-
cpu_emergency_svm_disable();
141+
cpu_emergency_disable_virtualization();
157142

158143
/*
159144
* Disable Intel PT to stop its logging

arch/x86/kernel/reboot.c

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -528,10 +528,7 @@ static inline void kb_wait(void)
528528
}
529529
}
530530

531-
static void vmxoff_nmi(int cpu, struct pt_regs *regs)
532-
{
533-
cpu_emergency_vmxoff();
534-
}
531+
static inline void nmi_shootdown_cpus_on_restart(void);
535532

536533
/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
537534
static void emergency_vmx_disable_all(void)
@@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void)
554551
__cpu_emergency_vmxoff();
555552

556553
/* Halt and exit VMX root operation on the other CPUs. */
557-
nmi_shootdown_cpus(vmxoff_nmi);
554+
nmi_shootdown_cpus_on_restart();
558555
}
559556
}
560557

@@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
795792
/* This is the CPU performing the emergency shutdown work. */
796793
int crashing_cpu = -1;
797794

795+
/*
796+
* Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
797+
* reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
798+
* GIF=0, i.e. if the crash occurred between CLGI and STGI.
799+
*/
800+
void cpu_emergency_disable_virtualization(void)
801+
{
802+
cpu_emergency_vmxoff();
803+
cpu_emergency_svm_disable();
804+
}
805+
798806
#if defined(CONFIG_SMP)
799807

800808
static nmi_shootdown_cb shootdown_callback;
@@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
817825
return NMI_HANDLED;
818826
local_irq_disable();
819827

820-
shootdown_callback(cpu, regs);
828+
if (shootdown_callback)
829+
shootdown_callback(cpu, regs);
830+
831+
/*
832+
* Prepare the CPU for reboot _after_ invoking the callback so that the
833+
* callback can safely use virtualization instructions, e.g. VMCLEAR.
834+
*/
835+
cpu_emergency_disable_virtualization();
821836

822837
atomic_dec(&waiting_for_crash_ipi);
823838
/* Assume hlt works */
@@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
828843
return NMI_HANDLED;
829844
}
830845

831-
/*
832-
* Halt all other CPUs, calling the specified function on each of them
846+
/**
847+
* nmi_shootdown_cpus - Stop other CPUs via NMI
848+
* @callback: Optional callback to be invoked from the NMI handler
849+
*
850+
* The NMI handler on the remote CPUs invokes @callback, if not
851+
* NULL, first and then disables virtualization to ensure that
852+
* INIT is recognized during reboot.
833853
*
834-
* This function can be used to halt all other CPUs on crash
835-
* or emergency reboot time. The function passed as parameter
836-
* will be called inside a NMI handler on all CPUs.
854+
* nmi_shootdown_cpus() can only be invoked once. After the first
855+
* invocation all other CPUs are stuck in crash_nmi_callback() and
856+
* cannot respond to a second NMI.
837857
*/
838858
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
839859
{
840860
unsigned long msecs;
861+
841862
local_irq_disable();
842863

864+
/*
865+
* Avoid certain doom if a shootdown already occurred; re-registering
866+
* the NMI handler will cause list corruption, modifying the callback
867+
* will do who knows what, etc...
868+
*/
869+
if (WARN_ON_ONCE(crash_ipi_issued))
870+
return;
871+
843872
/* Make a note of crashing cpu. Will be used in NMI callback. */
844873
crashing_cpu = safe_smp_processor_id();
845874

@@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
867896
msecs--;
868897
}
869898

870-
/* Leave the nmi callback set */
899+
/*
900+
* Leave the nmi callback set, shootdown is a one-time thing. Clearing
901+
* the callback could result in a NULL pointer dereference if a CPU
902+
* (finally) responds after the timeout expires.
903+
*/
904+
}
905+
906+
static inline void nmi_shootdown_cpus_on_restart(void)
907+
{
908+
if (!crash_ipi_issued)
909+
nmi_shootdown_cpus(NULL);
871910
}
872911

873912
/*
@@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
897936
/* No other CPUs to shoot down */
898937
}
899938

939+
static inline void nmi_shootdown_cpus_on_restart(void) { }
940+
900941
void run_crash_ipi_callback(struct pt_regs *regs)
901942
{
902943
}

0 commit comments

Comments
 (0)