Skip to content

Commit

Permalink
nEPT: Nested INVEPT
Browse files Browse the repository at this point in the history
If we let L1 use EPT, we should probably also support the INVEPT instruction.

In our current nested EPT implementation, when L1 changes its EPT table
for L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in
the course of this modification already calls INVEPT. But if last level
of shadow page is unsync not all L1's changes to EPT12 are intercepted,
which means roots need to be synced when L1 calls INVEPT. Global INVEPT
should not be different since roots are synced by kvm_mmu_load() each
time EPTP02 changes.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Nadav Har'El authored and bonzini committed Aug 7, 2013
1 parent 155a97a commit bfd0a56
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 0 deletions.
2 changes: 2 additions & 0 deletions arch/x86/include/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,13 +387,15 @@ enum vmcs_field {
#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_EPT_EXTENT_CONTEXT 1
#define VMX_EPT_EXTENT_GLOBAL 2
#define VMX_EPT_EXTENT_SHIFT 24

#define VMX_EPT_EXECUTE_ONLY_BIT (1ull)
#define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6)
#define VMX_EPTP_UC_BIT (1ull << 8)
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
#define VMX_EPT_INVEPT_BIT (1ull << 20)
#define VMX_EPT_AD_BIT (1ull << 21)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/uapi/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#define EXIT_REASON_EOI_INDUCED 45
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
#define EXIT_REASON_PREEMPTION_TIMER 52
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3182,6 +3182,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
mmu_sync_roots(vcpu);
spin_unlock(&vcpu->kvm->mmu_lock);
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);

static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
u32 access, struct x86_exception *exception)
Expand Down Expand Up @@ -3451,6 +3452,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
++vcpu->stat.tlb_flush;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);

static void paging_new_cr3(struct kvm_vcpu *vcpu)
{
Expand Down
72 changes: 72 additions & 0 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,7 @@ static void nested_release_page_clean(struct page *page)
kvm_release_page_clean(page);
}

static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
Expand Down Expand Up @@ -2161,6 +2162,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
static u32 nested_vmx_misc_low, nested_vmx_misc_high;
static u32 nested_vmx_ept_caps;
static __init void nested_vmx_setup_ctls_msrs(void)
{
/*
Expand Down Expand Up @@ -6279,6 +6281,74 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return 1;
}

/* Emulate the INVEPT instruction */
static int handle_invept(struct kvm_vcpu *vcpu)
{
u32 vmx_instruction_info, types;
unsigned long type;
gva_t gva;
struct x86_exception e;
struct {
u64 eptp, gpa;
} operand;
u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;

if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
!(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}

if (!nested_vmx_check_permission(vcpu))
return 1;

if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}

vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);

types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;

if (!(types & (1UL << type))) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return 1;
}

/* According to the Intel VMX instruction reference, the memory
* operand is read even if it isn't needed (e.g., for type==global)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmx_instruction_info, &gva))
return 1;
if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}

switch (type) {
case VMX_EPT_EXTENT_CONTEXT:
if ((operand.eptp & eptp_mask) !=
(nested_ept_get_cr3(vcpu) & eptp_mask))
break;
case VMX_EPT_EXTENT_GLOBAL:
kvm_mmu_sync_roots(vcpu);
kvm_mmu_flush_tlb(vcpu);
nested_vmx_succeed(vcpu);
break;
default:
BUG_ON(1);
break;
}

skip_emulated_instruction(vcpu);
return 1;
}

/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
Expand Down Expand Up @@ -6323,6 +6393,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_INVEPT] = handle_invept,
};

static const int kvm_vmx_max_exit_handlers =
Expand Down Expand Up @@ -6549,6 +6620,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
case EXIT_REASON_INVEPT:
/*
* VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting!
Expand Down

0 comments on commit bfd0a56

Please sign in to comment.