Skip to content

Commit

Permalink
x86: Store a per-cpu shadow copy of CR4
Browse files Browse the repository at this point in the history
Context switches and TLB flushes can change individual bits of CR4.
CR4 reads take several cycles, so store a shadow copy of CR4 in a
per-cpu variable.

To avoid wasting a cache line, I added the CR4 shadow to
cpu_tlbstate, which is already touched in switch_mm.  The heaviest
users of the cr4 shadow will be switch_mm and __switch_to_xtra, and
__switch_to_xtra is called shortly after switch_mm during context
switch, so the cacheline is likely to be hot.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Vince Weaver <vince@deater.net>
Cc: "hillf.zj" <hillf.zj@alibaba-inc.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/3a54dd3353fffbf84804398e00dfdc5b7c1afd7d.1414190806.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
amluto authored and Ingo Molnar committed Feb 4, 2015
1 parent 375074c commit 1e02ce4
Show file tree
Hide file tree
Showing 20 changed files with 85 additions and 46 deletions.
6 changes: 3 additions & 3 deletions arch/x86/include/asm/paravirt.h
Expand Up @@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x)
PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
}

static inline unsigned long read_cr4(void)
static inline unsigned long __read_cr4(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
}
static inline unsigned long read_cr4_safe(void)
static inline unsigned long __read_cr4_safe(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
}

static inline void write_cr4(unsigned long x)
static inline void __write_cr4(unsigned long x)
{
PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
}
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/include/asm/special_insns.h
Expand Up @@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x)
native_write_cr3(x);
}

static inline unsigned long read_cr4(void)
static inline unsigned long __read_cr4(void)
{
return native_read_cr4();
}

static inline unsigned long read_cr4_safe(void)
static inline unsigned long __read_cr4_safe(void)
{
return native_read_cr4_safe();
}

static inline void write_cr4(unsigned long x)
static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
}
Expand Down
52 changes: 39 additions & 13 deletions arch/x86/include/asm/tlbflush.h
Expand Up @@ -15,24 +15,56 @@
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
#endif

struct tlb_state {
#ifdef CONFIG_SMP
struct mm_struct *active_mm;
int state;
#endif

/*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
unsigned long cr4;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);

/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}

/* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask)
{
unsigned long cr4;

cr4 = read_cr4();
cr4 |= mask;
write_cr4(cr4);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 | mask) != cr4) {
cr4 |= mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
}

/* Clear in this cpu's CR4. */
static inline void cr4_clear_bits(unsigned long mask)
{
unsigned long cr4;

cr4 = read_cr4();
cr4 &= ~mask;
write_cr4(cr4);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 & ~mask) != cr4) {
cr4 &= ~mask;
this_cpu_write(cpu_tlbstate.cr4, cr4);
__write_cr4(cr4);
}
}

/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
return this_cpu_read(cpu_tlbstate.cr4);
}

/*
Expand Down Expand Up @@ -61,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void)
{
unsigned long cr4;

cr4 = native_read_cr4();
cr4 = this_cpu_read(cpu_tlbstate.cr4);
/* clear PGE */
native_write_cr4(cr4 & ~X86_CR4_PGE);
/* write old PGE again and flush TLBs */
Expand Down Expand Up @@ -221,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2

struct tlb_state {
struct mm_struct *active_mm;
int state;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);

static inline void reset_lazy_tlbstate(void)
{
this_cpu_write(cpu_tlbstate.state, 0);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/virtext.h
Expand Up @@ -46,7 +46,7 @@ static inline void cpu_vmxoff(void)

static inline int cpu_vmx_enabled(void)
{
return read_cr4() & X86_CR4_VMXE;
return __read_cr4() & X86_CR4_VMXE;
}

/** Disable VMX if it is enabled on the current CPU
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/acpi/sleep.c
Expand Up @@ -78,7 +78,7 @@ int x86_acpi_suspend_lowlevel(void)

header->pmode_cr0 = read_cr0();
if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
header->pmode_cr4 = read_cr4();
header->pmode_cr4 = __read_cr4();
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
}
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
Expand Down
7 changes: 7 additions & 0 deletions arch/x86/kernel/cpu/common.c
Expand Up @@ -19,6 +19,7 @@
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/debugreg.h>
#include <asm/sections.h>
#include <asm/vsyscall.h>
Expand Down Expand Up @@ -1293,6 +1294,12 @@ void cpu_init(void)

wait_for_master_cpu(cpu);

/*
* Initialize the CR4 shadow before doing anything that could
* try to read it.
*/
cr4_init_shadow();

/*
* Load microcode on this cpu if a valid microcode is available.
* This is early microcode loading procedure.
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kernel/cpu/mtrr/cyrix.c
Expand Up @@ -138,8 +138,8 @@ static void prepare_set(void)

/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
cr4 = read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE);
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}

/*
Expand Down Expand Up @@ -171,7 +171,7 @@ static void post_set(void)

/* Restore value of CR4 */
if (cpu_has_pge)
write_cr4(cr4);
__write_cr4(cr4);
}

static void cyrix_set_arr(unsigned int reg, unsigned long base,
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kernel/cpu/mtrr/generic.c
Expand Up @@ -678,8 +678,8 @@ static void prepare_set(void) __acquires(set_atomicity_lock)

/* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
cr4 = read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE);
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}

/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
Expand Down Expand Up @@ -708,7 +708,7 @@ static void post_set(void) __releases(set_atomicity_lock)

/* Restore value of CR4 */
if (cpu_has_pge)
write_cr4(cr4);
__write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}

Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/head32.c
Expand Up @@ -31,6 +31,7 @@ static void __init i386_default_early_setup(void)

asmlinkage __visible void __init i386_start_kernel(void)
{
cr4_init_shadow();
sanitize_boot_params(&boot_params);

/* Call the subarch specific early setup function */
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kernel/head64.c
Expand Up @@ -155,6 +155,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);

cr4_init_shadow();

/* Kill off the identity-map trampoline */
reset_early_page_tables();

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/process_32.c
Expand Up @@ -101,7 +101,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4_safe();
cr4 = __read_cr4_safe();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/process_64.c
Expand Up @@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, int all)
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4();
cr4 = __read_cr4();

printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
fs, fsindex, gs, gsindex, shadowgs);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/setup.c
Expand Up @@ -1178,7 +1178,7 @@ void __init setup_arch(char **cmdline_p)

if (boot_cpu_data.cpuid_level >= 0) {
/* A CPU has %cr4 if and only if it has CPUID */
mmu_cr4_features = read_cr4();
mmu_cr4_features = __read_cr4();
if (trampoline_cr4_features)
*trampoline_cr4_features = mmu_cr4_features;
}
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/svm.c
Expand Up @@ -1583,7 +1583,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)

static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;

if (cr4 & X86_CR4_VMXE)
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kvm/vmx.c
Expand Up @@ -2785,7 +2785,7 @@ static int hardware_enable(void)
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
u64 old, test_bits;

if (read_cr4() & X86_CR4_VMXE)
if (cr4_read_shadow() & X86_CR4_VMXE)
return -EBUSY;

INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
Expand Down Expand Up @@ -4255,7 +4255,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */

/* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = read_cr4();
cr4 = cr4_read_shadow();
vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
vmx->host_state.vmcs_host_cr4 = cr4;

Expand Down Expand Up @@ -7784,7 +7784,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);

cr4 = read_cr4();
cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4);
vmx->host_state.vmcs_host_cr4 = cr4;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/fault.c
Expand Up @@ -600,7 +600,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
if (pte && pte_present(*pte) && pte_exec(*pte) &&
(pgd_flags(*pgd) & _PAGE_USER) &&
(read_cr4() & X86_CR4_SMEP))
(__read_cr4() & X86_CR4_SMEP))
printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
}

Expand Down
9 changes: 9 additions & 0 deletions arch/x86/mm/init.c
Expand Up @@ -713,6 +713,15 @@ void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}

DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
#ifdef CONFIG_SMP
.active_mm = &init_mm,
.state = 0,
#endif
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
};
EXPORT_SYMBOL_GPL(cpu_tlbstate);

void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{
/* entry 0 MUST be WB (hardwired to speed up translations) */
Expand Down
3 changes: 0 additions & 3 deletions arch/x86/mm/tlb.c
Expand Up @@ -14,9 +14,6 @@
#include <asm/uv/uv.h>
#include <linux/debugfs.h>

DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
= { &init_mm, 0, };

/*
* Smarter SMP flushing macros.
* c/o Linus Torvalds.
Expand Down
11 changes: 4 additions & 7 deletions arch/x86/power/cpu.c
Expand Up @@ -105,11 +105,8 @@ static void __save_processor_state(struct saved_context *ctxt)
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
ctxt->cr3 = read_cr3();
#ifdef CONFIG_X86_32
ctxt->cr4 = read_cr4_safe();
#else
/* CONFIG_X86_64 */
ctxt->cr4 = read_cr4();
ctxt->cr4 = __read_cr4_safe();
#ifdef CONFIG_X86_64
ctxt->cr8 = read_cr8();
#endif
ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
Expand Down Expand Up @@ -175,12 +172,12 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
/* cr4 was introduced in the Pentium CPU */
#ifdef CONFIG_X86_32
if (ctxt->cr4)
write_cr4(ctxt->cr4);
__write_cr4(ctxt->cr4);
#else
/* CONFIG X86_64 */
wrmsrl(MSR_EFER, ctxt->efer);
write_cr8(ctxt->cr8);
write_cr4(ctxt->cr4);
__write_cr4(ctxt->cr4);
#endif
write_cr3(ctxt->cr3);
write_cr2(ctxt->cr2);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/realmode/init.c
Expand Up @@ -81,7 +81,7 @@ void __init setup_real_mode(void)

trampoline_header->start = (u64) secondary_startup_64;
trampoline_cr4_features = &trampoline_header->cr4;
*trampoline_cr4_features = read_cr4();
*trampoline_cr4_features = __read_cr4();

trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
Expand Down

0 comments on commit 1e02ce4

Please sign in to comment.