Skip to content

Commit

Permalink
OS-6546 Use PCID if KPTI is enabled
Browse files Browse the repository at this point in the history
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Alex Wilson <alex.wilson@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Alex Wilson <alex.wilson@joyent.com>
  • Loading branch information
John Levon committed Mar 13, 2018
1 parent 0e957fc commit 60f89b4
Show file tree
Hide file tree
Showing 34 changed files with 761 additions and 439 deletions.
10 changes: 5 additions & 5 deletions usr/src/cmd/mdb/i86pc/modules/unix/unix.c
Expand Up @@ -1010,18 +1010,18 @@ crregs_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
cr2 = kmdb_unix_getcr2();
cr3 = kmdb_unix_getcr3();
cr4 = kmdb_unix_getcr4();
mdb_printf("%%cr0 = 0x%08x <%b>\n", cr0, cr0, cr0_flag_bits);
mdb_printf("%%cr2 = 0x%08x <%a>\n", cr2, cr2);
mdb_printf("%%cr0 = 0x%lx <%b>\n", cr0, cr0, cr0_flag_bits);
mdb_printf("%%cr2 = 0x%lx <%a>\n", cr2, cr2);

if ((cr4 & CR4_PCIDE)) {
mdb_printf("%%cr3 = 0x%08x <pfn:0x%lx pcid:%u>\n",
mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx pcid:%lu>\n", cr3,
cr3 >> MMU_PAGESHIFT, cr3 & MMU_PAGEOFFSET);
} else {
mdb_printf("%%cr3 = 0x%08x <pfn:0x%lx flags:%b>\n", cr3,
mdb_printf("%%cr3 = 0x%lx <pfn:0x%lx flags:%b>\n", cr3,
cr3 >> MMU_PAGESHIFT, cr3, cr3_flag_bits);
}

mdb_printf("%%cr4 = 0x%08x <%b>\n", cr4, cr4, cr4_flag_bits);
mdb_printf("%%cr4 = 0x%lx <%b>\n", cr4, cr4, cr4_flag_bits);

return (DCMD_OK);
}
Expand Down
5 changes: 4 additions & 1 deletion usr/src/cmd/mdb/intel/kmdb/kaif.c
Expand Up @@ -265,13 +265,16 @@ kaif_set_register(const char *regname, kreg_t val)
static boolean_t
kaif_toxic_text(uintptr_t addr)
{
static GElf_Sym toxic_syms[1] = { 0, };
static GElf_Sym toxic_syms[2] = { 0, };
size_t i;

if (toxic_syms[0].st_name == NULL) {
if (mdb_tgt_lookup_by_name(mdb.m_target, MDB_TGT_OBJ_EXEC,
"tr_iret_user", &toxic_syms[0], NULL) != 0)
warn("couldn't find tr_iret_user\n");
if (mdb_tgt_lookup_by_name(mdb.m_target, MDB_TGT_OBJ_EXEC,
"tr_mmu_flush_user_range", &toxic_syms[1], NULL) != 0)
warn("couldn't find tr_mmu_flush_user_range\n");
}

for (i = 0; i < ARRAY_SIZE(toxic_syms); i++) {
Expand Down
6 changes: 4 additions & 2 deletions usr/src/uts/i86pc/io/gfx_private/gfxp_vm.c
Expand Up @@ -22,6 +22,8 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright 2018 Joyent, Inc.
*/

#include <sys/debug.h>
Expand Down Expand Up @@ -99,7 +101,7 @@ gfxp_map_kernel_space(uint64_t start, size_t size, uint32_t mode)
* The hypervisor doesn't allow r/w mappings to some pages, such as
* page tables, gdt, etc. Detect %cr3 to notify users of this interface.
*/
if (start == mmu_ptob(mmu_btop(getcr3())))
if (start == mmu_ptob(mmu_btop(getcr3_pa())))
return (0);
#endif

Expand Down Expand Up @@ -318,7 +320,7 @@ gfxp_load_kernel_space(uint64_t start, size_t size,
* The hypervisor doesn't allow r/w mappings to some pages, such as
* page tables, gdt, etc. Detect %cr3 to notify users of this interface.
*/
if (start == mmu_ptob(mmu_btop(getcr3())))
if (start == mmu_ptob(mmu_btop(getcr3_pa())))
return;
#endif

Expand Down
5 changes: 5 additions & 0 deletions usr/src/uts/i86pc/ml/fb_swtch_src.s
Expand Up @@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2018 Joyent, Inc.
*/


Expand Down Expand Up @@ -52,6 +53,9 @@ int fb_swtch_silence_lint = 0;


#define DISABLE_PAGING \
movl %cr4, %eax ;\
btrl $17, %eax /* clear PCIDE bit */ ;\
movl %eax, %cr4 ;\
movl %cr0, %eax ;\
btrl $31, %eax /* clear PG bit */ ;\
movl %eax, %cr0
Expand Down Expand Up @@ -222,6 +226,7 @@ _start:
* Disable long mode by:
* - shutting down paging (bit 31 of cr0). This will flush the
* TLBs.
* - turning off PCID in cr4
* - disabling LME (long mode enable) in EFER (extended feature reg)
*/
#endif
Expand Down
32 changes: 32 additions & 0 deletions usr/src/uts/i86pc/ml/kpti_trampolines.s
Expand Up @@ -92,6 +92,9 @@
* We do not do any stack pivoting for syscalls (and we leave SYSENTER's
* existing %rsp pivot untouched) -- instead we spill registers into
* %gs:CPU_KPTI_* as we need to.
*
* Note that the normal %cr3 values do not cause invalidations with PCIDE - see
* hat_switch().
*/

/*
Expand Down Expand Up @@ -705,6 +708,35 @@ tr_intr_ret_end:
MKIVCT(248); MKIVCT(249); MKIVCT(250); MKIVCT(251);
MKIVCT(252); MKIVCT(253); MKIVCT(254); MKIVCT(255);

/*
* We're PCIDE, but we don't have INVPCID. The only way to invalidate a
* PCID other than the current one, then, is to load its cr3 then
* invlpg. But loading kf_user_cr3 means we can longer access our
* caller's text mapping (or indeed, its stack). So this little helper
* has to live within our trampoline text region.
*
* Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3)
*/
ENTRY_NP(tr_mmu_flush_user_range)
push %rbx
/* When we read cr3, it never has the NOINVL bit set. */
mov %cr3, %rax
movq $CR3_NOINVL_BIT, %rbx
orq %rbx, %rax

mov %rcx, %cr3
add %rdi, %rsi
.align ASM_ENTRY_ALIGN
1:
invlpg (%rdi)
add %rdx, %rdi
cmp %rsi, %rdi
jb 1b
mov %rax, %cr3
pop %rbx
retq
SET_SIZE(tr_mmu_flush_user_range)

.align MMU_PAGESIZE
.global kpti_tramp_end
kpti_tramp_end:
Expand Down
6 changes: 4 additions & 2 deletions usr/src/uts/i86pc/ml/mpcore.s
Expand Up @@ -24,6 +24,8 @@
/*
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
*
* Copyright 2018 Joyent, Inc.
*/

#include <sys/asm_linkage.h>
Expand Down Expand Up @@ -326,7 +328,7 @@ kernel_cs_code:
* Complete the rest of the setup and call mp_startup().
*/
movq %gs:CPU_THREAD, %rax /* get thread ptr */
call *T_PC(%rax) /* call mp_startup */
call *T_PC(%rax) /* call mp_startup_boot */
/* not reached */
int $20 /* whoops, returned somehow! */

Expand Down Expand Up @@ -502,7 +504,7 @@ kernel_cs_code:

/*
* Before going any farther, enable usage of page table NX bit if
* that's how our page tables are set up.
* that's how our page tables are set up. (PCIDE is enabled later on).
*/
bt $X86FSET_NX, x86_featureset
jnc 1f
Expand Down
49 changes: 47 additions & 2 deletions usr/src/uts/i86pc/os/cpuid.c
Expand Up @@ -32,7 +32,7 @@
* Portions Copyright 2009 Advanced Micro Devices, Inc.
*/
/*
* Copyright 2017 Joyent, Inc.
* Copyright 2018 Joyent, Inc.
*/
/*
* Various routines to handle identification
Expand All @@ -58,6 +58,7 @@
#include <sys/memnode.h>
#include <sys/pci_cfgspace.h>
#include <sys/comm_page.h>
#include <sys/mach_mmu.h>
#include <sys/tsc.h>

#ifdef __xpv
Expand All @@ -83,7 +84,7 @@
* x86_vendor accordingly.
* o Processing the feature flags returned by the cpuid instruction while
* applying any workarounds or tricks for the specific processor.
* o Mapping the feature flags into Solaris feature bits (X86_*).
* o Mapping the feature flags into illumos feature bits (X86_*).
* o Processing extended feature flags if supported by the processor,
* again while applying specific processor knowledge.
* o Determining the CMT characteristics of the system.
Expand Down Expand Up @@ -122,6 +123,14 @@ uint_t x86_vendor = X86_VENDOR_IntelClone;
uint_t x86_type = X86_TYPE_OTHER;
uint_t x86_clflush_size = 0;

#if defined(__xpv)
int x86_use_pcid = 0;
int x86_use_invpcid = 0;
#else
int x86_use_pcid = -1;
int x86_use_invpcid = -1;
#endif

uint_t pentiumpro_bug4046376;

uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
Expand Down Expand Up @@ -196,6 +205,8 @@ static char *x86_feature_names[NUM_X86_FEATURES] = {
"umip",
"pku",
"ospke",
"pcid",
"invpcid",
};

boolean_t
Expand Down Expand Up @@ -1302,6 +1313,10 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_SMEP)
add_x86_feature(featureset, X86FSET_SMEP);

if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_INVPCID) {
add_x86_feature(featureset, X86FSET_INVPCID);
}

/*
* We check disable_smap here in addition to in startup_smap()
* to ensure CPUs that aren't the boot CPU don't accidentally
Expand Down Expand Up @@ -1504,6 +1519,13 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
}
}
}

if (cpi->cpi_vendor == X86_VENDOR_Intel) {
if (cp->cp_ecx & CPUID_INTC_ECX_PCID) {
add_x86_feature(featureset, X86FSET_PCID);
}
}

if (cp->cp_ecx & CPUID_INTC_ECX_X2APIC) {
add_x86_feature(featureset, X86FSET_X2APIC);
}
Expand Down Expand Up @@ -5003,6 +5025,29 @@ post_startup_cpu_fixups(void)
#endif /* !__xpv */
}

void
enable_pcid(void)
{
if (x86_use_pcid == -1)
x86_use_pcid = is_x86_feature(x86_featureset, X86FSET_PCID);

if (x86_use_invpcid == -1) {
x86_use_invpcid = is_x86_feature(x86_featureset,
X86FSET_INVPCID);
}

if (!x86_use_pcid)
return;

/*
* Intel say that on setting PCIDE, it immediately starts using the PCID
* bits; better make sure there's nothing there.
*/
ASSERT((getcr3() & MMU_PAGEOFFSET) == PCID_NONE);

setcr4(getcr4() | CR4_PCIDE);
}

/*
* Setup necessary registers to enable XSAVE feature on this processor.
* This function needs to be called early enough, so that no xsave/xrstor
Expand Down
8 changes: 7 additions & 1 deletion usr/src/uts/i86pc/os/fakebop.c
Expand Up @@ -26,7 +26,7 @@
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
*
* Copyright 2013 Joyent, Inc. All rights reserved.
* Copyright 2018 Joyent, Inc. All rights reserved.
*/

/*
Expand Down Expand Up @@ -846,6 +846,12 @@ do_bsys_doint(bootops_t *bop, int intnum, struct bop_regs *rp)
bios_func_t bios_func = (bios_func_t)(void *)(uintptr_t)0x5000;
bios_regs_t br;

/*
* We're about to disable paging; we shouldn't be PCID enabled.
*/
if (getcr4() & CR4_PCIDE)
prom_panic("do_bsys_doint() with PCID enabled\n");

/*
* The first time we do this, we have to copy the pre-packaged
* low memory bios call code image into place.
Expand Down
10 changes: 2 additions & 8 deletions usr/src/uts/i86pc/os/mach_kdi.c
Expand Up @@ -21,10 +21,10 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Copyright 2018 Joyent, Inc.
*/

#pragma ident "%Z%%M% %I% %E% SMI"

/*
* Kernel/Debugger Interface (KDI) routines. Called during debugger under
* various system states (boot, while running, while the debugger has control).
Expand Down Expand Up @@ -113,12 +113,6 @@ kdi_dreg_set(int reg, ulong_t value)
}
}

void
kdi_flush_caches(void)
{
reload_cr3();
}

extern void kdi_slave_entry(void);

void
Expand Down
15 changes: 11 additions & 4 deletions usr/src/uts/i86pc/os/mlsetup.c
Expand Up @@ -148,17 +148,24 @@ mlsetup(struct regs *rp)
else
cpuid_feature_edx_exclude = (uint32_t)prop_value;

#if defined(__amd64) && !defined(__xpv)
#if !defined(__xpv)
/*
* Check to see if KPTI has been explicitly enabled or disabled.
* We have to check this before init_desctbls().
*/
if (bootprop_getval("kpti", &prop_value) != 0) {
kpti_enable = 1;
} else {
if (bootprop_getval("kpti", &prop_value) == 0) {
kpti_enable = (uint64_t)(prop_value == 1);
prom_printf("unix: forcing kpti to %s due to boot argument\n",
(kpti_enable == 1) ? "ON" : "OFF");
} else {
kpti_enable = 1;
}

if (bootprop_getval("pcid", &prop_value) == 0 && prop_value == 0) {
prom_printf("unix: forcing pcid to OFF due to boot argument\n");
x86_use_pcid = 0;
} else if (kpti_enable != 1) {
x86_use_pcid = 0;
}
#endif

Expand Down
19 changes: 8 additions & 11 deletions usr/src/uts/i86pc/os/mp_pc.c
Expand Up @@ -133,10 +133,11 @@ rmp_gdt_init(rm_platter_t *rm)

#if defined(__amd64)
/* Use the kas address space for the CPU startup thread. */
if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
if (mmu_ptob(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) {
panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
"located above 4G in physical memory (@ 0x%lx)",
MAKECR3(kas.a_hat->hat_htable->ht_pfn));
mmu_ptob(kas.a_hat->hat_htable->ht_pfn));
}

/*
* Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
Expand Down Expand Up @@ -356,21 +357,17 @@ mach_cpucontext_xalloc(struct cpu *cp, int optype)

/*
* CPU needs to access kernel address space after powering on.
* When hot-adding CPU at runtime, directly use top level page table
* of kas other than the return value of getcr3(). getcr3() returns
* current process's top level page table, which may be different from
* the one of kas.
*/
rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn, PCID_NONE);
rm->rm_cpu = cp->cpu_id;

/*
* For hot-adding CPU at runtime, Machine Check and Performance Counter
* should be disabled. They will be enabled on demand after CPU powers
* on successfully
* We need to mask off any bits set on our boot CPU that can't apply
* while the subject CPU is initializing. If appropriate, they are
* enabled later on.
*/
rm->rm_cr4 = getcr4();
rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE | CR4_PCIDE);

rmp_gdt_init(rm);

Expand Down
2 changes: 2 additions & 0 deletions usr/src/uts/i86pc/os/mp_startup.c
Expand Up @@ -1796,6 +1796,8 @@ mp_startup_common(boolean_t boot)
*/
cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);

enable_pcid();

/*
* Setup this processor for XSAVE.
*/
Expand Down

0 comments on commit 60f89b4

Please sign in to comment.