Skip to content

Commit

Permalink
x86: Switch between host and guest PAT
Browse files Browse the repository at this point in the history
Do not allow the guest to mess with the PAT MSR in a was that also
affects the host. This may cause the host to run in uncached mode,
slowing it down, or - even worse- access MMIO with caches enabled which
will cause inconsistencies.

On Intel, we have to require and enable the related save/restore
feature. On AMD, we need to intercept the MSR accesses and map them on
the g_pat field of the VMCB.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
  • Loading branch information
jan-kiszka committed Apr 10, 2015
1 parent 355118a commit f4cdab0
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 20 deletions.
1 change: 1 addition & 0 deletions hypervisor/arch/x86/include/asm/percpu.h
Expand Up @@ -71,6 +71,7 @@ struct per_cpu {
struct segment linux_fs;
struct segment linux_gs;
struct segment linux_tss;
unsigned long linux_pat;
unsigned long linux_efer;
unsigned long linux_sysenter_cs;
unsigned long linux_sysenter_eip;
Expand Down
2 changes: 2 additions & 0 deletions hypervisor/arch/x86/include/asm/processor.h
Expand Up @@ -91,6 +91,8 @@
#define FEATURE_CONTROL_LOCKED (1 << 0)
#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1 << 2)

#define PAT_RESET_VALUE 0x0007040600070406UL

#define EFER_LME 0x00000100
#define EFER_LMA 0x00000400
#define EFER_NXE 0x00000800
Expand Down
3 changes: 3 additions & 0 deletions hypervisor/arch/x86/include/asm/vmx.h
Expand Up @@ -210,10 +210,13 @@ enum vmx_state { VMXOFF = 0, VMXON, VMCS_READY };
#define SECONDARY_EXEC_UNRESTRICTED_GUEST (1UL << 7)

#define VM_EXIT_HOST_ADDR_SPACE_SIZE (1UL << 9)
#define VM_EXIT_SAVE_IA32_PAT (1UL << 18)
#define VM_EXIT_LOAD_IA32_PAT (1UL << 19)
#define VM_EXIT_SAVE_IA32_EFER (1UL << 20)
#define VM_EXIT_LOAD_IA32_EFER (1UL << 21)

#define VM_ENTRY_IA32E_MODE (1UL << 9)
#define VM_ENTRY_LOAD_IA32_PAT (1UL << 14)
#define VM_ENTRY_LOAD_IA32_EFER (1UL << 15)

#define VMX_MISC_ACTIVITY_HLT (1UL << 6)
Expand Down
4 changes: 4 additions & 0 deletions hypervisor/arch/x86/setup.c
Expand Up @@ -193,6 +193,9 @@ int arch_cpu_init(struct per_cpu *cpu_data)
cpu_data->linux_cr3 = read_cr3();
write_cr3(paging_hvirt2phys(hv_paging_structs.root_table));

cpu_data->linux_pat = read_msr(MSR_IA32_PAT);
write_msr(MSR_IA32_PAT, PAT_RESET_VALUE);

cpu_data->linux_efer = read_msr(MSR_EFER);

cpu_data->linux_sysenter_cs = read_msr(MSR_IA32_SYSENTER_CS);
Expand Down Expand Up @@ -253,6 +256,7 @@ void arch_cpu_restore(struct per_cpu *cpu_data, int return_code)

vcpu_exit(cpu_data);

write_msr(MSR_IA32_PAT, cpu_data->linux_pat);
write_msr(MSR_EFER, cpu_data->linux_efer);
write_cr0(cpu_data->linux_cr0);
write_cr3(cpu_data->linux_cr3);
Expand Down
29 changes: 16 additions & 13 deletions hypervisor/arch/x86/svm.c
Expand Up @@ -41,8 +41,6 @@

#define MTRR_DEFTYPE 0x2ff

#define PAT_RESET_VALUE 0x0007040600070406UL

static bool has_avic, has_assists, has_flush_by_asid;

static const struct segment invalid_seg;
Expand All @@ -55,7 +53,9 @@ static u8 __attribute__((aligned(PAGE_SIZE))) msrpm[][0x2000/4] = {
[ SVM_MSRPM_0000 ] = {
[ 0/4 ... 0x017/4 ] = 0,
[ 0x018/4 ... 0x01b/4 ] = 0x80, /* 0x01b (w) */
[ 0x01c/4 ... 0x2fb/4 ] = 0,
[ 0x01c/4 ... 0x273/4 ] = 0,
[ 0x274/4 ... 0x277/4 ] = 0xc0, /* 0x277 (rw) */
[ 0x278/4 ... 0x2fb/4 ] = 0,
[ 0x2fc/4 ... 0x2ff/4 ] = 0x80, /* 0x2ff (w) */
[ 0x300/4 ... 0x7ff/4 ] = 0,
/* x2APIC MSRs - emulated if not present */
Expand Down Expand Up @@ -216,7 +216,7 @@ static int vmcb_setup(struct per_cpu *cpu_data)
vmcb->efer = (cpu_data->linux_efer | EFER_SVME);

/* Linux uses custom PAT setting */
vmcb->g_pat = read_msr(MSR_IA32_PAT);
vmcb->g_pat = cpu_data->linux_pat;

vmcb->general1_intercepts |= GENERAL1_INTERCEPT_NMI;
vmcb->general1_intercepts |= GENERAL1_INTERCEPT_CR0_SEL_WRITE;
Expand Down Expand Up @@ -446,14 +446,6 @@ void __attribute__((noreturn)) vcpu_activate_vmm(struct per_cpu *cpu_data)
vmcb_pa = paging_hvirt2phys(&cpu_data->vmcb);
host_stack = (unsigned long)cpu_data->stack + sizeof(cpu_data->stack);

/*
* XXX: Jailhouse doesn't use PAT, so it is explicitly set to the
* reset value. However, this value is later combined with vmcb->g_pat
* (as per APMv2, Sect. 15.25.8) which may lead to subtle bugs as the
* actual memory type might slightly differ from what Linux expects.
*/
write_msr(MSR_IA32_PAT, PAT_RESET_VALUE);

/* We enter Linux at the point arch_entry would return to as well.
* rax is cleared to signal success to the caller. */
asm volatile(
Expand Down Expand Up @@ -498,7 +490,6 @@ vcpu_deactivate_vmm(struct registers *guest_regs)
write_msr(MSR_CSTAR, vmcb->cstar);
write_msr(MSR_SFMASK, vmcb->sfmask);
write_msr(MSR_KERNGS_BASE, vmcb->kerngsbase);
write_msr(MSR_IA32_PAT, vmcb->g_pat);

cpu_data->linux_cr0 = vmcb->cr0;
cpu_data->linux_cr3 = vmcb->cr3;
Expand All @@ -512,6 +503,7 @@ vcpu_deactivate_vmm(struct registers *guest_regs)

cpu_data->linux_tss.selector = vmcb->tr.selector;

cpu_data->linux_pat = vmcb->g_pat;
cpu_data->linux_efer = vmcb->efer & (~EFER_SVME);
cpu_data->linux_fs.base = vmcb->fs.base;
cpu_data->linux_gs.base = vmcb->gs.base;
Expand Down Expand Up @@ -834,6 +826,11 @@ static bool svm_handle_msr_read(struct registers *guest_regs,
vcpu_skip_emulated_instruction(X86_INST_LEN_RDMSR);
x2apic_handle_read(guest_regs);
return true;
} else if (guest_regs->rcx == MSR_IA32_PAT) {
vcpu_skip_emulated_instruction(X86_INST_LEN_RDMSR);
guest_regs->rax = cpu_data->vmcb.g_pat & 0xffffffff;
guest_regs->rdx = cpu_data->vmcb.g_pat >> 32;
return true;
} else {
panic_printk("FATAL: Unhandled MSR read: %x\n",
guest_regs->rcx);
Expand All @@ -853,6 +850,12 @@ static bool svm_handle_msr_write(struct registers *guest_regs,
result = x2apic_handle_write(guest_regs, cpu_data);
goto out;
}
if (guest_regs->rcx == MSR_IA32_PAT) {
vmcb->g_pat = (guest_regs->rax & 0xffffffff) |
(guest_regs->rdx << 32);
vmcb->clean_bits &= ~CLEAN_BITS_NP;
goto out;
}
if (guest_regs->rcx == MSR_EFER) {
/* Never let a guest to disable SVME; see APMv2, Sect. 3.1.7 */
efer = (guest_regs->rax & 0xffffffff) |
Expand Down
21 changes: 14 additions & 7 deletions hypervisor/arch/x86/vmx.c
Expand Up @@ -232,10 +232,13 @@ static int vmx_check_features(void)
return trace_error(-EIO);
}

/* require EFER save/restore */
/* require PAT and EFER save/restore */
vmx_entry_ctrl = read_msr(MSR_IA32_VMX_ENTRY_CTLS) >> 32;
vmx_exit_ctrl = read_msr(MSR_IA32_VMX_EXIT_CTLS) >> 32;
if (!(vmx_entry_ctrl & VM_ENTRY_LOAD_IA32_EFER) ||
if (!(vmx_entry_ctrl & VM_ENTRY_LOAD_IA32_PAT) ||
!(vmx_entry_ctrl & VM_ENTRY_LOAD_IA32_EFER) ||
!(vmx_exit_ctrl & VM_EXIT_SAVE_IA32_PAT) ||
!(vmx_exit_ctrl & VM_EXIT_LOAD_IA32_PAT) ||
!(vmx_exit_ctrl & VM_EXIT_SAVE_IA32_EFER) ||
!(vmx_exit_ctrl & VM_EXIT_LOAD_IA32_EFER))
return trace_error(-EIO);
Expand Down Expand Up @@ -472,6 +475,7 @@ static bool vmcs_setup(struct per_cpu *cpu_data)
read_idtr(&dtr);
ok &= vmcs_write64(HOST_IDTR_BASE, dtr.base);

ok &= vmcs_write64(HOST_IA32_PAT, read_msr(MSR_IA32_PAT));
ok &= vmcs_write64(HOST_IA32_EFER, EFER_LMA | EFER_LME);

ok &= vmcs_write32(HOST_IA32_SYSENTER_CS, 0);
Expand Down Expand Up @@ -520,10 +524,9 @@ static bool vmcs_setup(struct per_cpu *cpu_data)
ok &= vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
ok &= vmcs_write64(GUEST_PENDING_DBG_EXCEPTIONS, 0);

ok &= vmcs_write64(GUEST_IA32_PAT, cpu_data->linux_pat);
ok &= vmcs_write64(GUEST_IA32_EFER, cpu_data->linux_efer);

// TODO: switch PAT */

ok &= vmcs_write64(VMCS_LINK_POINTER, -1UL);
ok &= vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);

Expand Down Expand Up @@ -555,16 +558,18 @@ static bool vmcs_setup(struct per_cpu *cpu_data)
ok &= vmcs_write32(EXCEPTION_BITMAP, 0);

val = read_msr(MSR_IA32_VMX_EXIT_CTLS);
val |= VM_EXIT_HOST_ADDR_SPACE_SIZE | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_LOAD_IA32_EFER;
val |= VM_EXIT_HOST_ADDR_SPACE_SIZE |
VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
VM_EXIT_SAVE_IA32_EFER | VM_EXIT_LOAD_IA32_EFER;
ok &= vmcs_write32(VM_EXIT_CONTROLS, val);

ok &= vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
ok &= vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
ok &= vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);

val = read_msr(MSR_IA32_VMX_ENTRY_CTLS);
val |= VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER;
val |= VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_PAT |
VM_ENTRY_LOAD_IA32_EFER;
ok &= vmcs_write32(VM_ENTRY_CONTROLS, val);

ok &= vmcs_write64(CR4_GUEST_HOST_MASK, 0);
Expand Down Expand Up @@ -704,6 +709,7 @@ vcpu_deactivate_vmm(struct registers *guest_regs)

cpu_data->linux_tss.selector = vmcs_read32(GUEST_TR_SELECTOR);

cpu_data->linux_pat = vmcs_read64(GUEST_IA32_PAT);
cpu_data->linux_efer = vmcs_read64(GUEST_IA32_EFER);
cpu_data->linux_fs.base = vmcs_read64(GUEST_FS_BASE);
cpu_data->linux_gs.base = vmcs_read64(GUEST_GS_BASE);
Expand Down Expand Up @@ -811,6 +817,7 @@ static void vmx_vcpu_reset(unsigned int sipi_vector)
ok &= vmcs_write64(GUEST_IDTR_BASE, 0);
ok &= vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);

ok &= vmcs_write64(GUEST_IA32_PAT, PAT_RESET_VALUE);
ok &= vmcs_write64(GUEST_IA32_EFER, 0);

ok &= vmcs_write32(GUEST_SYSENTER_CS, 0);
Expand Down

0 comments on commit f4cdab0

Please sign in to comment.