@@ -6,7 +6,11 @@
*/

#include <linux/context_tracking.h>
#include <linux/linkage.h>
#include <linux/lockdep.h>
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/thread_info.h>

#include <asm/cpufeature.h>
@@ -15,7 +19,11 @@
#include <asm/exception.h>
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/sdei.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>

/*
* This is intended to match the logic in irqentry_enter(), handling the kernel
@@ -67,7 +75,7 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
}
}

void noinstr arm64_enter_nmi(struct pt_regs *regs)
static void noinstr arm64_enter_nmi(struct pt_regs *regs)
{
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();

@@ -80,7 +88,7 @@ void noinstr arm64_enter_nmi(struct pt_regs *regs)
ftrace_nmi_enter();
}

void noinstr arm64_exit_nmi(struct pt_regs *regs)
static void noinstr arm64_exit_nmi(struct pt_regs *regs)
{
bool restore = regs->lockdep_hardirqs;

@@ -97,22 +105,81 @@ void noinstr arm64_exit_nmi(struct pt_regs *regs)
__nmi_exit();
}

asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
arm64_enter_nmi(regs);
else
enter_from_kernel_mode(regs);
}

asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
arm64_exit_nmi(regs);
else
exit_to_kernel_mode(regs);
}

static void __sched arm64_preempt_schedule_irq(void)
{
lockdep_assert_irqs_disabled();

/*
* DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
* priority masking is used the GIC irqchip driver will clear DAIF.IF
* using gic_arch_enable_irqs() for normal IRQs. If anything is set in
* DAIF we must have handled an NMI, so skip preemption.
*/
if (system_uses_irq_prio_masking() && read_sysreg(daif))
return;

/*
* Preempting a task from an IRQ means we leave copies of PSTATE
* on the stack. cpufeature's enable calls may modify PSTATE, but
* resuming one of these preempted tasks would undo those changes.
*
* Only allow a task to be preempted once cpufeatures have been
* enabled.
*/
if (system_capabilities_finalized())
preempt_schedule_irq();
}

static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
if (on_thread_stack())
call_on_irq_stack(regs, handler);
else
handler(regs);
}

extern void (*handle_arch_irq)(struct pt_regs *);
extern void (*handle_arch_fiq)(struct pt_regs *);

static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
unsigned int esr)
{
arm64_enter_nmi(regs);

console_verbose();

pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n",
vector, smp_processor_id(), esr,
esr_get_class_string(esr));

__show_regs(regs);
panic("Unhandled exception");
}

#define UNHANDLED(el, regsize, vector) \
asmlinkage void noinstr el##_##regsize##_##vector##_handler(struct pt_regs *regs) \
{ \
const char *desc = #regsize "-bit " #el " " #vector; \
__panic_unhandled(regs, desc, read_sysreg(esr_el1)); \
}

#ifdef CONFIG_ARM64_ERRATUM_1463225
static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);

@@ -162,6 +229,11 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
}
#endif /* CONFIG_ARM64_ERRATUM_1463225 */

UNHANDLED(el1t, 64, sync)
UNHANDLED(el1t, 64, irq)
UNHANDLED(el1t, 64, fiq)
UNHANDLED(el1t, 64, error)

static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
{
unsigned long far = read_sysreg(far_el1);
@@ -193,15 +265,6 @@ static void noinstr el1_undef(struct pt_regs *regs)
exit_to_kernel_mode(regs);
}

static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
{
enter_from_kernel_mode(regs);
local_daif_inherit(regs);
bad_mode(regs, 0, esr);
local_daif_mask();
exit_to_kernel_mode(regs);
}

static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
{
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -245,7 +308,7 @@ static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
exit_to_kernel_mode(regs);
}

asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);

@@ -275,10 +338,50 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
el1_fpac(regs, esr);
break;
default:
el1_inv(regs, esr);
__panic_unhandled(regs, "64-bit el1h sync", esr);
}
}

static void noinstr el1_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
write_sysreg(DAIF_PROCCTX_NOIRQ, daif);

enter_el1_irq_or_nmi(regs);
do_interrupt_handler(regs, handler);

/*
* Note: thread_info::preempt_count includes both thread_info::count
* and thread_info::need_resched, and is not equivalent to
* preempt_count().
*/
if (IS_ENABLED(CONFIG_PREEMPTION) &&
READ_ONCE(current_thread_info()->preempt_count) == 0)
arm64_preempt_schedule_irq();

exit_el1_irq_or_nmi(regs);
}

asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
{
el1_interrupt(regs, handle_arch_irq);
}

asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
{
el1_interrupt(regs, handle_arch_fiq);
}

asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);

local_daif_restore(DAIF_ERRCTX);
arm64_enter_nmi(regs);
do_serror(regs, esr);
arm64_exit_nmi(regs);
}

asmlinkage void noinstr enter_from_user_mode(void)
{
lockdep_hardirqs_off(CALLER_ADDR0);
@@ -398,7 +501,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)

enter_from_user_mode();
do_debug_exception(far, esr, regs);
local_daif_restore(DAIF_PROCCTX_NOIRQ);
local_daif_restore(DAIF_PROCCTX);
}

static void noinstr el0_svc(struct pt_regs *regs)
@@ -415,7 +518,7 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
do_ptrauth_fault(regs, esr);
}

asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);

@@ -468,6 +571,56 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
}
}

static void noinstr el0_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
enter_from_user_mode();

write_sysreg(DAIF_PROCCTX_NOIRQ, daif);

if (regs->pc & BIT(55))
arm64_apply_bp_hardening();

do_interrupt_handler(regs, handler);
}

static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
{
el0_interrupt(regs, handle_arch_irq);
}

asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs)
{
__el0_irq_handler_common(regs);
}

static void noinstr __el0_fiq_handler_common(struct pt_regs *regs)
{
el0_interrupt(regs, handle_arch_fiq);
}

asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
{
__el0_fiq_handler_common(regs);
}

static void __el0_error_handler_common(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);

enter_from_user_mode();
local_daif_restore(DAIF_ERRCTX);
arm64_enter_nmi(regs);
do_serror(regs, esr);
arm64_exit_nmi(regs);
local_daif_restore(DAIF_PROCCTX);
}

asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
{
__el0_error_handler_common(regs);
}

#ifdef CONFIG_COMPAT
static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
{
@@ -483,7 +636,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
do_el0_svc_compat(regs);
}

asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
{
unsigned long esr = read_sysreg(esr_el1);

@@ -526,4 +679,71 @@ asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
el0_inv(regs, esr);
}
}

asmlinkage void noinstr el0t_32_irq_handler(struct pt_regs *regs)
{
__el0_irq_handler_common(regs);
}

asmlinkage void noinstr el0t_32_fiq_handler(struct pt_regs *regs)
{
__el0_fiq_handler_common(regs);
}

asmlinkage void noinstr el0t_32_error_handler(struct pt_regs *regs)
{
__el0_error_handler_common(regs);
}
#else /* CONFIG_COMPAT */
UNHANDLED(el0t, 32, sync)
UNHANDLED(el0t, 32, irq)
UNHANDLED(el0t, 32, fiq)
UNHANDLED(el0t, 32, error)
#endif /* CONFIG_COMPAT */

#ifdef CONFIG_VMAP_STACK
asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
{
unsigned int esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);

arm64_enter_nmi(regs);
panic_bad_stack(regs, esr, far);
}
#endif /* CONFIG_VMAP_STACK */

#ifdef CONFIG_ARM_SDE_INTERFACE
asmlinkage noinstr unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
unsigned long ret;

/*
* We didn't take an exception to get here, so the HW hasn't
* set/cleared bits in PSTATE that we may rely on.
*
* The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
* whether PSTATE bits are inherited unchanged or generated from
* scratch, and the TF-A implementation always clears PAN and always
* clears UAO. There are no other known implementations.
*
* Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
* PSTATE is modified upon architectural exceptions, and so PAN is
* either inherited or set per SCTLR_ELx.SPAN, and UAO is always
* cleared.
*
* We must explicitly reset PAN to the expected state, including
* clearing it when the host isn't using it, in case a VM had it set.
*/
if (system_uses_hw_pan())
set_pstate_pan(1);
else if (cpu_has_pan())
set_pstate_pan(0);

arm64_enter_nmi(regs);
ret = do_sdei_event(regs, arg);
arm64_exit_nmi(regs);

return ret;
}
#endif /* CONFIG_ARM_SDE_INTERFACE */

Large diffs are not rendered by default.

@@ -0,0 +1,46 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Low-level idle sequences
*/

#include <linux/cpu.h>
#include <linux/irqflags.h>

#include <asm/barrier.h>
#include <asm/cpuidle.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>

/*
* cpu_do_idle()
*
* Idle the processor (wait for interrupt).
*
* If the CPU supports priority masking we must do additional work to
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
void noinstr cpu_do_idle(void)
{
struct arm_cpuidle_irq_context context;

arm_cpuidle_save_irq_context(&context);

dsb(sy);
wfi();

arm_cpuidle_restore_irq_context(&context);
}

/*
* This is our default idle handler.
*/
void noinstr arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt
* tricks
*/
cpu_do_idle();
raw_local_irq_enable();
}
@@ -18,7 +18,6 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/lockdep.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/nospec.h>
@@ -48,7 +47,6 @@
#include <asm/alternative.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cpuidle.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
@@ -74,40 +72,6 @@ EXPORT_SYMBOL_GPL(pm_power_off);

void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);

/*
* cpu_do_idle()
*
* Idle the processor (wait for interrupt).
*
* If the CPU supports priority masking we must do additional work to
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
void noinstr cpu_do_idle(void)
{
struct arm_cpuidle_irq_context context;

arm_cpuidle_save_irq_context(&context);

dsb(sy);
wfi();

arm_cpuidle_restore_irq_context(&context);
}

/*
* This is our default idle handler.
*/
void noinstr arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt
* tricks
*/
cpu_do_idle();
raw_local_irq_enable();
}

#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
@@ -723,22 +687,6 @@ static int __init tagged_addr_init(void)
core_initcall(tagged_addr_init);
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */

asmlinkage void __sched arm64_preempt_schedule_irq(void)
{
lockdep_assert_irqs_disabled();

/*
* Preempting a task from an IRQ means we leave copies of PSTATE
* on the stack. cpufeature's enable calls may modify PSTATE, but
* resuming one of these preempted tasks would undo those changes.
*
* Only allow a task to be preempted once cpufeatures have been
* enabled.
*/
if (system_capabilities_finalized())
preempt_schedule_irq();
}

#ifdef CONFIG_BINFMT_ELF
int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
bool has_interp, bool is_interp)
@@ -233,13 +233,13 @@ unsigned long sdei_arch_get_entry_point(int conduit)
}

/*
* __sdei_handler() returns one of:
* do_sdei_event() returns one of:
* SDEI_EV_HANDLED - success, return to the interrupted context.
* SDEI_EV_FAILED - failure, return this error code to firmare.
* virtual-address - success, return to this address.
*/
static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
u32 mode;
int i, err = 0;
@@ -294,45 +294,3 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,

return vbar + 0x480;
}

static void __kprobes notrace __sdei_pstate_entry(void)
{
/*
* The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
* whether PSTATE bits are inherited unchanged or generated from
* scratch, and the TF-A implementation always clears PAN and always
* clears UAO. There are no other known implementations.
*
* Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
* PSTATE is modified upon architectural exceptions, and so PAN is
* either inherited or set per SCTLR_ELx.SPAN, and UAO is always
* cleared.
*
* We must explicitly reset PAN to the expected state, including
* clearing it when the host isn't using it, in case a VM had it set.
*/
if (system_uses_hw_pan())
set_pstate_pan(1);
else if (cpu_has_pan())
set_pstate_pan(0);
}

asmlinkage noinstr unsigned long
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
{
unsigned long ret;

/*
* We didn't take an exception to get here, so the HW hasn't
* set/cleared bits in PSTATE that we may rely on. Initialize PAN.
*/
__sdei_pstate_entry();

arm64_enter_nmi(regs);

ret = _sdei_handler(regs, arg);

arm64_exit_nmi(regs);

return ret;
}
@@ -45,13 +45,6 @@
#include <asm/system_misc.h>
#include <asm/sysreg.h>

static const char *handler[] = {
"Synchronous Abort",
"IRQ",
"FIQ",
"Error"
};

int show_unhandled_signals = 0;

static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
@@ -750,28 +743,9 @@ const char *esr_get_class_string(u32 esr)
return esr_class_str[ESR_ELx_EC(esr)];
}

/*
* bad_mode handles the impossible case in the exception vector. This is always
* fatal.
*/
asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
{
arm64_enter_nmi(regs);

console_verbose();

pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
handler[reason], smp_processor_id(), esr,
esr_get_class_string(esr));

__show_regs(regs);
local_daif_mask();
panic("bad mode");
}

/*
* bad_el0_sync handles unexpected, but potentially recoverable synchronous
* exceptions taken from EL0. Unlike bad_mode, this returns.
* exceptions taken from EL0.
*/
void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
{
@@ -789,15 +763,11 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
__aligned(16);

asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
unsigned int esr = read_sysreg(esr_el1);
unsigned long far = read_sysreg(far_el1);

arm64_enter_nmi(regs);

console_verbose();
pr_emerg("Insufficient stack space to handle exception!");
@@ -870,15 +840,11 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
}
}

asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr)
void do_serror(struct pt_regs *regs, unsigned int esr)
{
arm64_enter_nmi(regs);

/* non-RAS errors are not containable */
if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
arm64_serror_panic(regs, esr);

arm64_exit_nmi(regs);
}

/* GENERIC_BUG traps */
@@ -836,13 +836,6 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(do_mem_abort);

void do_el0_irq_bp_hardening(void)
{
/* PC has already been checked in entry.S */
arm64_apply_bp_hardening();
}
NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);

void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,