Skip to content

Commit

Permalink
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "A set of x86 fixes:

   - Cure the LDT remapping to user space on 5 level paging which ended
     up in the KASLR space

   - Remove LDT mapping before freeing the LDT pages

   - Make NFIT MCE handling more robust

   - Unbreak the VSMP build by removing the dependency on paravirt ops

   - Support broken PIT emulation on Microsoft hyperV

   - Don't trace vmware_sched_clock() to avoid tracer recursion

   - Remove -pipe from KBUILD CFLAGS which breaks clang and is also
     slower on GCC

   - Trivial coding style and typo fixes"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/cpu/vmware: Do not trace vmware_sched_clock()
  x86/vsmp: Remove dependency on pv_irq_ops
  x86/ldt: Remove unused variable in map_ldt_struct()
  x86/ldt: Unmap PTEs for the slot before freeing LDT pages
  x86/mm: Move LDT remap out of KASLR region on 5-level paging
  acpi/nfit, x86/mce: Validate a MCE's address before using it
  acpi/nfit, x86/mce: Handle only uncorrectable machine checks
  x86/build: Remove -pipe from KBUILD_CFLAGS
  x86/hyper-v: Fix indentation in hv_do_fast_hypercall16()
  Documentation/x86: Fix typo in zero-page.txt
  x86/hyper-v: Enable PIT shutdown quirk
  clockevents/drivers/i8253: Add support for PIT shutdown quirk
  • Loading branch information
torvalds committed Nov 11, 2018
2 parents 655c6b9 + 1503538 commit b6df7b6
Show file tree
Hide file tree
Showing 17 changed files with 114 additions and 138 deletions.
34 changes: 18 additions & 16 deletions Documentation/x86/x86_64/mm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,24 @@ __________________|____________|__________________|_________|___________________
____________________________________________________________|___________________________________________________________
| | | |
ffff800000000000 | -128 TB | ffff87ffffffffff | 8 TB | ... guard hole, also reserved for hypervisor
ffff880000000000 | -120 TB | ffffc7ffffffffff | 64 TB | direct mapping of all physical memory (page_offset_base)
ffffc80000000000 | -56 TB | ffffc8ffffffffff | 1 TB | ... unused hole
ffff880000000000 | -120 TB | ffff887fffffffff | 0.5 TB | LDT remap for PTI
ffff888000000000 | -119.5 TB | ffffc87fffffffff | 64 TB | direct mapping of all physical memory (page_offset_base)
ffffc88000000000 | -55.5 TB | ffffc8ffffffffff | 0.5 TB | ... unused hole
ffffc90000000000 | -55 TB | ffffe8ffffffffff | 32 TB | vmalloc/ioremap space (vmalloc_base)
ffffe90000000000 | -23 TB | ffffe9ffffffffff | 1 TB | ... unused hole
ffffea0000000000 | -22 TB | ffffeaffffffffff | 1 TB | virtual memory map (vmemmap_base)
ffffeb0000000000 | -21 TB | ffffebffffffffff | 1 TB | ... unused hole
ffffec0000000000 | -20 TB | fffffbffffffffff | 16 TB | KASAN shadow memory
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
| | | | vaddr_end for KASLR
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | LDT remap for PTI
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
__________________|____________|__________________|_________|____________________________________________________________
|
| Identical layout to the 47-bit one from here on:
| Identical layout to the 56-bit one from here on:
____________________________________________________________|____________________________________________________________
| | | |
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
| | | | vaddr_end for KASLR
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
Expand Down Expand Up @@ -83,31 +84,32 @@ Notes:
__________________|____________|__________________|_________|___________________________________________________________
| | | |
0000800000000000 | +64 PB | ffff7fffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
| | | | virtual memory addresses up to the -128 TB
| | | | virtual memory addresses up to the -64 PB
| | | | starting offset of kernel mappings.
__________________|____________|__________________|_________|___________________________________________________________
|
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
ff00000000000000 | -64 PB | ff0fffffffffffff | 4 PB | ... guard hole, also reserved for hypervisor
ff10000000000000 | -60 PB | ff8fffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base)
ff90000000000000 | -28 PB | ff9fffffffffffff | 4 PB | LDT remap for PTI
ff10000000000000 | -60 PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI
ff11000000000000 | -59.75 PB | ff90ffffffffffff | 32 PB | direct mapping of all physical memory (page_offset_base)
ff91000000000000 | -27.75 PB | ff9fffffffffffff | 3.75 PB | ... unused hole
ffa0000000000000 | -24 PB | ffd1ffffffffffff | 12.5 PB | vmalloc/ioremap space (vmalloc_base)
ffd2000000000000 | -11.5 PB | ffd3ffffffffffff | 0.5 PB | ... unused hole
ffd4000000000000 | -11 PB | ffd5ffffffffffff | 0.5 PB | virtual memory map (vmemmap_base)
ffd6000000000000 | -10.5 PB | ffdeffffffffffff | 2.25 PB | ... unused hole
ffdf000000000000 | -8.25 PB | fffffdffffffffff | ~8 PB | KASAN shadow memory
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
| | | | vaddr_end for KASLR
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
__________________|____________|__________________|_________|____________________________________________________________
|
| Identical layout to the 47-bit one from here on:
____________________________________________________________|____________________________________________________________
| | | |
fffffc0000000000 | -4 TB | fffffdffffffffff | 2 TB | ... unused hole
| | | | vaddr_end for KASLR
fffffe0000000000 | -2 TB | fffffe7fffffffff | 0.5 TB | cpu_entry_area mapping
fffffe8000000000 | -1.5 TB | fffffeffffffffff | 0.5 TB | ... unused hole
ffffff0000000000 | -1 TB | ffffff7fffffffff | 0.5 TB | %esp fixup stacks
ffffff8000000000 | -512 GB | ffffffeeffffffff | 444 GB | ... unused hole
ffffffef00000000 | -68 GB | fffffffeffffffff | 64 GB | EFI region mapping space
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | ... unused hole
Expand Down
2 changes: 1 addition & 1 deletion Documentation/x86/zero-page.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Offset Proto Name Meaning
0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
140/080 ALL edid_info Video mode setup (struct edid_info)
1C0/020 ALL efi_info EFI 32 information (struct efi_info)
1E0/004 ALL alk_mem_k Alternative mem check, in KB
1E0/004 ALL alt_mem_k Alternative mem check, in KB
1E4/004 ALL scratch Scratch field for the kernel setup code
1E8/001 ALL e820_entries Number of entries in e820_table (below)
1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)
Expand Down
1 change: 0 additions & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,6 @@ config X86_VSMP
bool "ScaleMP vSMP"
select HYPERVISOR_GUEST
select PARAVIRT
select PARAVIRT_XXL
depends on X86_64 && PCI
depends on X86_EXTENDED_PLATFORM
depends on SMP
Expand Down
4 changes: 1 addition & 3 deletions arch/x86/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,6 @@ ifdef CONFIG_X86_64
KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
endif

# Speed up the build
KBUILD_CFLAGS += -pipe
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
KBUILD_CFLAGS += -Wno-sign-compare
#
Expand All @@ -239,7 +237,7 @@ archheaders:
archmacros:
$(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s

ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,-
ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s
export ASM_MACRO_FLAGS
KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)

Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am

int mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
int mce_usable_address(struct mce *m);

DECLARE_PER_CPU(unsigned, mce_exception_count);
DECLARE_PER_CPU(unsigned, mce_poll_count);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/mshyperv.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
: "cc");
}
#endif
return hv_status;
return hv_status;
}

/*
Expand Down
12 changes: 7 additions & 5 deletions arch/x86/include/asm/page_64_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@

/*
* Set __PAGE_OFFSET to the most negative possible address +
* PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
* PGDIR_SIZE*17 (pgd slot 273).
*
* The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for
* a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary,
* but it's what Xen requires.
*/
#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)
#define __PAGE_OFFSET_BASE_L5 _AC(0xff11000000000000, UL)
#define __PAGE_OFFSET_BASE_L4 _AC(0xffff888000000000, UL)

#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
Expand Down
4 changes: 1 addition & 3 deletions arch/x86/include/asm/pgtable_64_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,7 @@ extern unsigned int ptrs_per_p4d;
*/
#define MAXMEM (1UL << MAX_PHYSMEM_BITS)

#define LDT_PGD_ENTRY_L4 -3UL
#define LDT_PGD_ENTRY_L5 -112UL
#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
#define LDT_PGD_ENTRY -240UL
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#define LDT_END_ADDR (LDT_BASE_ADDR + PGDIR_SIZE)

Expand Down
6 changes: 4 additions & 2 deletions arch/x86/kernel/cpu/mcheck/mce.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ static void mce_report_event(struct pt_regs *regs)
* be somewhat complicated (e.g. segment offset would require an instruction
* parser). So only support physical addresses up to page granuality for now.
*/
static int mce_usable_address(struct mce *m)
int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_ADDRV))
return 0;
Expand All @@ -505,6 +505,7 @@ static int mce_usable_address(struct mce *m)

return 1;
}
EXPORT_SYMBOL_GPL(mce_usable_address);

bool mce_is_memory_error(struct mce *m)
{
Expand Down Expand Up @@ -534,7 +535,7 @@ bool mce_is_memory_error(struct mce *m)
}
EXPORT_SYMBOL_GPL(mce_is_memory_error);

static bool mce_is_correctable(struct mce *m)
bool mce_is_correctable(struct mce *m)
{
if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
return false;
Expand All @@ -547,6 +548,7 @@ static bool mce_is_correctable(struct mce *m)

return true;
}
EXPORT_SYMBOL_GPL(mce_is_correctable);

static bool cec_add_mce(struct mce *m)
{
Expand Down
11 changes: 11 additions & 0 deletions arch/x86/kernel/cpu/mshyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/i8253.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
Expand Down Expand Up @@ -295,6 +296,16 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;

/*
* Hyper-V VMs have a PIT emulation quirk such that zeroing the
* counter register during PIT shutdown restarts the PIT. So it
* continues to interrupt @18.2 HZ. Setting i8253_clear_counter
* to false tells pit_shutdown() not to zero the counter so that
* the PIT really is shutdown. Generation 2 VMs don't have a PIT,
* and setting this value has no effect.
*/
i8253_clear_counter_on_shutdown = false;

#if IS_ENABLED(CONFIG_HYPERV)
/*
* Setup the hook to get control post apic initialization.
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/cpu/vmware.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s)
}
early_param("no-vmw-sched-clock", setup_vmw_sched_clock);

static unsigned long long vmware_sched_clock(void)
static unsigned long long notrace vmware_sched_clock(void)
{
unsigned long long ns;

Expand Down
59 changes: 38 additions & 21 deletions arch/x86/kernel/ldt.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,23 +199,14 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm)
/*
* If PTI is enabled, this maps the LDT into the kernelmode and
* usermode tables for the given mm.
*
* There is no corresponding unmap function. Even if the LDT is freed, we
* leave the PTEs around until the slot is reused or the mm is destroyed.
* This is harmless: the LDT is always in ordinary memory, and no one will
* access the freed slot.
*
* If we wanted to unmap freed LDTs, we'd also need to do a flush to make
* it useful, and the flush would slow down modify_ldt().
*/
static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
unsigned long va;
bool is_vmalloc;
spinlock_t *ptl;
pgd_t *pgd;
int i;
int i, nr_pages;

if (!static_cpu_has(X86_FEATURE_PTI))
return 0;
Expand All @@ -229,16 +220,11 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
/* Check if the current mappings are sane */
sanity_check_ldt_mapping(mm);

/*
* Did we already have the top level entry allocated? We can't
* use pgd_none() for this because it doens't do anything on
* 4-level page table kernels.
*/
pgd = pgd_offset(mm, LDT_BASE_ADDR);

is_vmalloc = is_vmalloc_addr(ldt->entries);

for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);

for (i = 0; i < nr_pages; i++) {
unsigned long offset = i << PAGE_SHIFT;
const void *src = (char *)ldt->entries + offset;
unsigned long pfn;
Expand Down Expand Up @@ -272,20 +258,50 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
/* Propagate LDT mapping to the user page-table */
map_ldt_struct_to_user(mm);

va = (unsigned long)ldt_slot_va(slot);
flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false);

ldt->slot = slot;
return 0;
}

static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
{
unsigned long va;
int i, nr_pages;

if (!ldt)
return;

/* LDT map/unmap is only required for PTI */
if (!static_cpu_has(X86_FEATURE_PTI))
return;

nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);

for (i = 0; i < nr_pages; i++) {
unsigned long offset = i << PAGE_SHIFT;
spinlock_t *ptl;
pte_t *ptep;

va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
ptep = get_locked_pte(mm, va, &ptl);
pte_clear(mm, va, ptep);
pte_unmap_unlock(ptep, ptl);
}

va = (unsigned long)ldt_slot_va(ldt->slot);
flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
}

#else /* !CONFIG_PAGE_TABLE_ISOLATION */

static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
return 0;
}

static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
{
}
#endif /* CONFIG_PAGE_TABLE_ISOLATION */

static void free_ldt_pgtables(struct mm_struct *mm)
Expand Down Expand Up @@ -524,6 +540,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
}

install_ldt(mm, new_ldt);
unmap_ldt_struct(mm, old_ldt);
free_ldt_struct(old_ldt);
error = 0;

Expand Down
Loading

0 comments on commit b6df7b6

Please sign in to comment.