Skip to content

Commit

Permalink
x86: Emulate interrupt remapping support to enable x2APIC usage
Browse files Browse the repository at this point in the history
If we want to use x2APIC on real hardware (virtual machines do not have
this limitation), interrupt remapping has to be enabled. As we take over
hardware control from Linux, we either have to switch the APIC modes on
handover (tricky specifically for x2APIC->xAPIC) or let Linux boot with
interrupt remapping already enable. We choose the latter way as the
cleaner one that also allow us to run Linux without xAPIC emulation
(non-root cells are expected to use the x2APIC unconditionally).

IR emulation requires both the interpretation of the interrupt remapping
table that Linux uses (vtd_get_remapped_root_int) as well as basic
queued invalidation emulation (vtd_emulate_qi_request). We also need to
handle FSTS register reads, but we simply return 0 here and let
Jailhouse report all faults.

Physical address provided by Linux via registers and data structures are
mapped on demand into the hypervisor. This avoids that we create a
static mapping that depends on Linux-controlled parameters (would be bad
for check-summing). We also make sure this way that the addressed memory
still belongs to Linux.

Returning IR and QI to Linux is more complex than stealing it because we
not only have to load overwritten registers with their original values:
the Invalidation Queue Head cannot be set by software. Instead, we need
to inject dummy invalidation wait requests until the hardware reaches
the value Linux expects.

Note that this IR emulation feature is solely designed to be used by the
root cell. Non-root cells have to continue to program the virtualized
interrupt registers of assigned devices.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
  • Loading branch information
jan-kiszka committed Aug 28, 2014
1 parent e2c14a2 commit 20b09b8
Show file tree
Hide file tree
Showing 8 changed files with 374 additions and 25 deletions.
9 changes: 6 additions & 3 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,13 @@ currently:
(except when running inside QEMU)
- at least 2 logical CPUs
- x86-64 Linux kernel (tested against >= 3.9)
- VT-d usage has to be disabled in the Linux kernel, e.g. via command
line parameters:
- VT-d IOMMU usage (DMAR) has to be disabled in the Linux kernel, e.g. via
the command line parameter:

intel_iommu=off intremap=off
intel_iommu=off

- To exploit the faster x2APIC, interrupt remapping needs to be on in the
kernel (check for CONFIG_IRQ_REMAP)


Build
Expand Down
1 change: 1 addition & 0 deletions hypervisor/arch/x86/include/asm/apic.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ struct apic_irq_message {
u8 dest_logical:1;
u8 level_triggered:1;
u8 redir_hint:1;
u8 valid:1;
u32 destination;
};

Expand Down
1 change: 1 addition & 0 deletions hypervisor/arch/x86/include/asm/cell.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ struct cell {

struct {
struct paging_structures pg_structs;
bool ir_emulation;
} vtd;

unsigned int id;
Expand Down
19 changes: 19 additions & 0 deletions hypervisor/arch/x86/include/asm/vtd.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct vtd_entry {
#define VTD_ECAP_REG 0x10
# define VTD_ECAP_QI (1UL << 1)
# define VTD_ECAP_IR (1UL << 3)
# define VTD_ECAP_EIM (1UL << 4)
#define VTD_GCMD_REG 0x18
# define VTD_GCMD_SIRTP (1UL << 24)
# define VTD_GCMD_IRE (1UL << 25)
Expand Down Expand Up @@ -85,10 +86,19 @@ struct vtd_entry {
#define VTD_PLMLIMIT_REG 0x6c
#define VTD_PHMBASE_REG 0x70
#define VTD_PHMLIMIT_REG 0x78
#define VTD_IQH_REG 0x80
# define VTD_IQH_QH_SHIFT 4
#define VTD_IQT_REG 0x88
# define VTD_IQT_QT_MASK BIT_MASK(18, 4)
# define VTD_IQT_QT_SHIFT 4
#define VTD_IQA_REG 0x90
# define VTD_IQA_ADDR_MASK BIT_MASK(63, 12)
#define VTD_IRTA_REG 0xb8
# define VTD_IRTA_SIZE_MASK BIT_MASK(3, 0)
# define VTD_IRTA_EIME (1UL << 11)
# define VTD_IRTA_ADDR_MASK BIT_MASK(63, 12)

#define VTD_REQ_INV_MASK BIT_MASK(3, 0)

#define VTD_REQ_INV_CONTEXT 0x01
# define VTD_INV_CONTEXT_GLOBAL (1UL << 4)
Expand All @@ -105,9 +115,13 @@ struct vtd_entry {
#define VTD_REQ_INV_INT 0x04
# define VTD_INV_INT_GLOBAL (0UL << 4)
# define VTD_INV_INT_INDEX (1UL << 4)
# define VTD_INV_INT_IM_MASK BIT_MASK(31, 27)
# define VTD_INV_INT_IM_SHIFT 27
# define VTD_INV_INT_IIDX_MASK BIT_MASK(47, 32)
# define VTD_INV_INT_IIDX_SHIFT 32

#define VTD_REQ_INV_WAIT 0x05
#define VTD_INV_WAIT_IF (1UL << 4)
#define VTD_INV_WAIT_SW (1UL << 5)
#define VTD_INV_WAIT_FN (1UL << 6)
#define VTD_INV_WAIT_SDATA_SHIFT 32
Expand Down Expand Up @@ -155,6 +169,9 @@ int vtd_unmap_memory_region(struct cell *cell,
const struct jailhouse_memory *mem);
int vtd_add_pci_device(struct cell *cell, struct pci_device *device);
void vtd_remove_pci_device(struct pci_device *device);
struct apic_irq_message
vtd_get_remapped_root_int(unsigned int iommu, u16 device_id,
unsigned int vector, unsigned int remap_index);
int vtd_map_interrupt(struct cell *cell, u16 device_id, unsigned int vector,
struct apic_irq_message irq_msg);
void vtd_cell_exit(struct cell *cell);
Expand All @@ -164,3 +181,5 @@ void vtd_config_commit(struct cell *cell_added_removed);
void vtd_shutdown(void);

void vtd_check_pending_faults(struct per_cpu *cpu_data);

int vtd_mmio_access_handler(bool is_write, u64 addr, u32 *value);
35 changes: 26 additions & 9 deletions hypervisor/arch/x86/ioapic.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,30 @@ static void ioapic_reg_write(unsigned int reg, u32 value)
}

static struct apic_irq_message
ioapic_translate_redir_entry(union ioapic_redir_entry entry)
ioapic_translate_redir_entry(struct cell *cell, unsigned int pin,
union ioapic_redir_entry entry)
{
struct apic_irq_message irq_msg;
struct apic_irq_message irq_msg = { .valid = 0 };
unsigned int idx;

if (cell->vtd.ir_emulation) {
if (!entry.remap.remapped)
return irq_msg;

idx = entry.remap.int_index | (entry.remap.int_index15 << 15);

return vtd_get_remapped_root_int(root_cell.ioapic_iommu,
root_cell.ioapic_id, pin,
idx);
}

irq_msg.vector = entry.native.vector;
irq_msg.delivery_mode = entry.native.delivery_mode;
irq_msg.level_triggered = entry.native.level_triggered;
irq_msg.dest_logical = entry.native.dest_logical;
/* align redir_hint and dest_logical - required by vtd_map_interrupt */
irq_msg.redir_hint = irq_msg.dest_logical;
irq_msg.valid = 1;
irq_msg.destination = entry.native.destination;

return irq_msg;
Expand Down Expand Up @@ -99,7 +113,7 @@ static int ioapic_virt_redir_write(struct cell *cell, unsigned int reg,
return 0;
}

irq_msg = ioapic_translate_redir_entry(entry);
irq_msg = ioapic_translate_redir_entry(cell, pin, entry);

result = vtd_map_interrupt(cell, cell->ioapic_id, pin, irq_msg);
// HACK for QEMU
Expand All @@ -119,7 +133,8 @@ static int ioapic_virt_redir_write(struct cell *cell, unsigned int reg,
return 0;
}

static void ioapic_mask_pins(u64 pin_bitmap, enum ioapic_handover handover)
static void ioapic_mask_pins(struct cell *cell, u64 pin_bitmap,
enum ioapic_handover handover)
{
union ioapic_redir_entry entry;
unsigned int pin, reg;
Expand All @@ -145,7 +160,8 @@ static void ioapic_mask_pins(u64 pin_bitmap, enum ioapic_handover handover)
* interrupts.
*/
entry = shadow_redir_table[pin];
apic_send_irq(ioapic_translate_redir_entry(entry));
apic_send_irq(ioapic_translate_redir_entry(cell, pin,
entry));
}
}
}
Expand Down Expand Up @@ -201,9 +217,9 @@ void ioapic_prepare_handover(void)
return;
if (irqchip) {
pin_bitmap = irqchip->pin_bitmap;
ioapic_mask_pins(pin_bitmap, PINS_ACTIVE);
ioapic_mask_pins(&root_cell, pin_bitmap, PINS_ACTIVE);
}
ioapic_mask_pins(~pin_bitmap, PINS_MASKED);
ioapic_mask_pins(&root_cell, ~pin_bitmap, PINS_MASKED);
}

void ioapic_cell_init(struct cell *cell)
Expand All @@ -218,7 +234,8 @@ void ioapic_cell_init(struct cell *cell)

if (cell != &root_cell) {
root_cell.ioapic_pin_bitmap &= ~irqchip->pin_bitmap;
ioapic_mask_pins(irqchip->pin_bitmap, PINS_MASKED);
ioapic_mask_pins(cell, irqchip->pin_bitmap,
PINS_MASKED);
}
}
}
Expand All @@ -233,7 +250,7 @@ void ioapic_cell_exit(struct cell *cell)
if (!cell_irqchip)
return;

ioapic_mask_pins(cell_irqchip->pin_bitmap, PINS_MASKED);
ioapic_mask_pins(cell, cell_irqchip->pin_bitmap, PINS_MASKED);
if (root_irqchip)
root_cell.ioapic_pin_bitmap |= cell_irqchip->pin_bitmap &
root_irqchip->pin_bitmap;
Expand Down
16 changes: 15 additions & 1 deletion hypervisor/arch/x86/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,20 @@ static struct apic_irq_message
pci_translate_msi_vector(struct pci_device *device, unsigned int vector,
unsigned int legacy_vectors, union x86_msi_vector msi)
{
struct apic_irq_message irq_msg;
struct apic_irq_message irq_msg = { .valid = 0 };
unsigned int idx;

if (device->cell->vtd.ir_emulation) {
if (!msi.remap.remapped)
return irq_msg;

idx = msi.remap.int_index | (msi.remap.int_index15 << 15);
if (msi.remap.shv)
idx += msi.remap.subhandle;
return vtd_get_remapped_root_int(device->info->iommu,
device->info->bdf,
vector, idx);
}

irq_msg.vector = msi.native.vector;
if (legacy_vectors > 1) {
Expand All @@ -254,6 +267,7 @@ pci_translate_msi_vector(struct pci_device *device, unsigned int vector,
irq_msg.level_triggered = 0;
irq_msg.dest_logical = msi.native.dest_logical;
irq_msg.redir_hint = msi.native.redir_hint;
irq_msg.valid = 1;
irq_msg.destination = msi.native.destination;

return irq_msg;
Expand Down
2 changes: 2 additions & 0 deletions hypervisor/arch/x86/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,8 @@ static bool vmx_handle_ept_violation(struct registers *guest_regs,
if (result == 0)
result = pci_mmio_access_handler(cpu_data->cell, is_write,
phys_addr, &val);
if (result == 0)
result = vtd_mmio_access_handler(is_write, phys_addr, &val);

if (result == 1) {
if (!is_write)
Expand Down
Loading

0 comments on commit 20b09b8

Please sign in to comment.