diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 5a3a2ae52b5df..134d2a9c0f7ed 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -13,7 +13,7 @@ extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); @@ -21,6 +21,7 @@ extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); extern int amd_iommu_init_api(void); +extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS void amd_iommu_debugfs_setup(struct amd_iommu *iommu); @@ -116,11 +117,18 @@ void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) amd_iommu_domain_set_pt_root(domain, 0); } +static inline int get_pci_sbdf_id(struct pci_dev *pdev) +{ + int seg = pci_domain_nr(pdev->bus); + u16 devid = pci_dev_id(pdev); + + return PCI_SEG_DEVID_TO_SBDF(seg, devid); +} extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev); -extern int __init add_special_device(u8 type, u8 id, u16 *devid, +extern int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); #ifdef CONFIG_DMI @@ -131,4 +139,5 @@ static inline void amd_iommu_apply_ivrs_quirks(void) { } extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); +extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 111c4cad5d9b5..dae145780ed5f 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -443,14 +443,22 @@ struct irq_remap_table { u32 *table; }; -extern struct irq_remap_table **irq_lookup_table; - /* Interrupt remapping feature used? */ extern bool amd_iommu_irq_remap; /* kmem_cache to get tables with 128 byte alignement */ extern struct kmem_cache *amd_iommu_irq_cache; +#define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff) +#define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff) +#define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \ + ((devid) & 0xffff)) + +/* Make iterating over all pci segment easier */ +#define for_each_pci_segment(pci_seg) \ + list_for_each_entry((pci_seg), &amd_iommu_pci_seg_list, list) +#define for_each_pci_segment_safe(pci_seg, next) \ + list_for_each_entry_safe((pci_seg), (next), &amd_iommu_pci_seg_list, list) /* * Make iterating over all IOMMUs easier */ @@ -473,13 +481,14 @@ extern struct kmem_cache *amd_iommu_irq_cache; struct amd_iommu_fault { u64 address; /* IO virtual address of the fault*/ u32 pasid; /* Address space identifier */ - u16 device_id; /* Originating PCI device id */ + u32 sbdf; /* Originating PCI device id */ u16 tag; /* PPR tag */ u16 flags; /* Fault flags */ }; +struct amd_iommu; struct iommu_domain; struct irq_domain; struct amd_irte_ops; @@ -525,6 +534,75 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; +/* + * This structure contains information about one PCI segment in the system. + */ +struct amd_iommu_pci_seg { + /* List with all PCI segments in the system */ + struct list_head list; + + /* List of all available dev_data structures */ + struct llist_head dev_data_list; + + /* PCI segment number */ + u16 id; + + /* Largest PCI device id we expect translation requests for */ + u16 last_bdf; + + /* Size of the device table */ + u32 dev_table_size; + + /* Size of the alias table */ + u32 alias_table_size; + + /* Size of the rlookup table */ + u32 rlookup_table_size; + + /* + * device table virtual address + * + * Pointer to the per PCI segment device table. + * It is indexed by the PCI device id or the HT unit id and contains + * information about the domain the device belongs to as well as the + * page table root pointer. + */ + struct dev_table_entry *dev_table; + + /* + * The rlookup iommu table is used to find the IOMMU which is + * responsible for a specific device. It is indexed by the PCI + * device id. + */ + struct amd_iommu **rlookup_table; + + /* + * This table is used to find the irq remapping table for a given + * device id quickly. + */ + struct irq_remap_table **irq_lookup_table; + + /* + * Pointer to a device table which the content of old device table + * will be copied to. It's only be used in kdump kernel. + */ + struct dev_table_entry *old_dev_tbl_cpy; + + /* + * The alias table is a driver specific data structure which contains the + * mappings of the PCI device ids to the actual requestor ids on the IOMMU. + * More than one device can share the same requestor id. + */ + u16 *alias_table; + + /* + * A list of required unity mappings we find in ACPI. It is not locked + * because as runtime it is only read. It is created at ACPI table + * parsing time. + */ + struct list_head unity_map; +}; + /* * Structure where we save information about one hardware AMD IOMMU in the * system. @@ -576,7 +654,7 @@ struct amd_iommu { u16 cap_ptr; /* pci domain of this IOMMU */ - u16 pci_seg; + struct amd_iommu_pci_seg *pci_seg; /* start of exclusion range of that IOMMU */ u64 exclusion_start; @@ -664,8 +742,8 @@ struct acpihid_map_entry { struct list_head list; u8 uid[ACPIHID_UID_LEN]; u8 hid[ACPIHID_HID_LEN]; - u16 devid; - u16 root_devid; + u32 devid; + u32 root_devid; bool cmd_line; struct iommu_group *group; }; @@ -673,7 +751,7 @@ struct acpihid_map_entry { struct devid_map { struct list_head list; u8 id; - u16 devid; + u32 devid; bool cmd_line; }; @@ -687,7 +765,7 @@ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct llist_node dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ - struct pci_dev *pdev; + struct device *dev; u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ struct { @@ -707,6 +785,12 @@ extern struct list_head ioapic_map; extern struct list_head hpet_map; extern struct list_head acpihid_map; +/* + * List with all PCI segments in the system. This list is not locked because + * it is only written at driver initialization time + */ +extern struct list_head amd_iommu_pci_seg_list; + /* * List with all IOMMUs in the system. This list is not locked because it is * only written and read at driver initialization or suspend time @@ -746,39 +830,13 @@ struct unity_map_entry { int prot; }; -/* - * List of all unity mappings. It is not locked because as runtime it is only - * read. It is created at ACPI table parsing time. - */ -extern struct list_head amd_iommu_unity_map; - /* * Data structures for device handling */ -/* - * Device table used by hardware. Read and write accesses by software are - * locked with the amd_iommu_pd_table lock. - */ -extern struct dev_table_entry *amd_iommu_dev_table; - -/* - * Alias table to find requestor ids to device ids. Not locked because only - * read on runtime. - */ -extern u16 *amd_iommu_alias_table; - -/* - * Reverse lookup table to find the IOMMU which translates a specific device. - */ -extern struct amd_iommu **amd_iommu_rlookup_table; - /* size of the dma_ops aperture as power of 2 */ extern unsigned amd_iommu_aperture_order; -/* largest PCI device id we expect translation requests for */ -extern u16 amd_iommu_last_bdf; - /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; @@ -911,6 +969,7 @@ struct irq_2_irte { struct amd_ir_data { u32 cached_ga_tag; + struct amd_iommu *iommu; struct irq_2_irte irq_2_irte; struct msi_msg msi_entry; void *entry; /* Pointer to union irte or struct irte_ga */ @@ -927,9 +986,9 @@ struct amd_ir_data { struct amd_irte_ops { void (*prepare)(void *, u32, bool, u8, u32, int); - void (*activate)(void *, u16, u16); - void (*deactivate)(void *, u16, u16); - void (*set_affinity)(void *, u16, u16, u8, u32); + void (*activate)(struct amd_iommu *iommu, void *, u16, u16); + void (*deactivate)(struct amd_iommu *iommu, void *, u16, u16); + void (*set_affinity)(struct amd_iommu *iommu, void *, u16, u16, u8, u32); void *(*get)(struct irq_remap_table *, int); void (*set_allocated)(struct irq_remap_table *, int); bool (*is_allocated)(struct irq_remap_table *, int); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index fafb7bd798394..85e001743b8a5 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -144,7 +144,8 @@ struct ivmd_header { u16 length; u16 devid; u16 aux; - u64 resv; + u16 pci_seg; + u8 resv[6]; u64 range_start; u64 range_length; } __attribute__((packed)); @@ -163,11 +164,7 @@ static bool amd_iommu_force_enable __initdata; static bool amd_iommu_irtcachedis; static int amd_iommu_target_ivhd_type; -u16 amd_iommu_last_bdf; /* largest PCI device id we have - to handle */ -LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings - we find in ACPI */ - +LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -188,48 +185,12 @@ static bool amd_iommu_pc_present __read_mostly; bool amd_iommu_force_isolation __read_mostly; -/* - * Pointer to the device table which is shared by all AMD IOMMUs - * it is indexed by the PCI device id or the HT unit id and contains - * information about the domain the device belongs to as well as the - * page table root pointer. - */ -struct dev_table_entry *amd_iommu_dev_table; -/* - * Pointer to a device table which the content of old device table - * will be copied to. It's only be used in kdump kernel. - */ -static struct dev_table_entry *old_dev_tbl_cpy; - -/* - * The alias table is a driver specific data structure which contains the - * mappings of the PCI device ids to the actual requestor ids on the IOMMU. - * More than one device can share the same requestor id. - */ -u16 *amd_iommu_alias_table; - -/* - * The rlookup table is used to find the IOMMU which is responsible - * for a specific device. It is also indexed by the PCI device id. - */ -struct amd_iommu **amd_iommu_rlookup_table; - -/* - * This table is used to find the irq remapping table for a given device id - * quickly. - */ -struct irq_remap_table **irq_lookup_table; - /* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ unsigned long *amd_iommu_pd_alloc_bitmap; -static u32 dev_table_size; /* size of the device table */ -static u32 alias_table_size; /* size of the alias table */ -static u32 rlookup_table_size; /* size if the rlookup table */ - enum iommu_init_state { IOMMU_START_STATE, IOMMU_IVRS_DETECTED, @@ -259,7 +220,7 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); -static void init_device_table_dma(void); +static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); static bool amd_iommu_pre_enabled = true; @@ -284,16 +245,10 @@ static void init_translation_status(struct amd_iommu *iommu) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } -static inline void update_last_devid(u16 devid) -{ - if (devid > amd_iommu_last_bdf) - amd_iommu_last_bdf = devid; -} - -static inline unsigned long tbl_size(int entry_size) +static inline unsigned long tbl_size(int entry_size, int last_bdf) { unsigned shift = PAGE_SHIFT + - get_order(((int)amd_iommu_last_bdf + 1) * entry_size); + get_order((last_bdf + 1) * entry_size); return 1UL << shift; } @@ -422,10 +377,12 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) static void iommu_set_device_table(struct amd_iommu *iommu) { u64 entry; + u32 dev_table_size = iommu->pci_seg->dev_table_size; + void *dev_table = (void *)get_dev_table(iommu); BUG_ON(iommu->mmio_base == NULL); - entry = iommu_virt_to_phys(amd_iommu_dev_table); + entry = iommu_virt_to_phys(dev_table); entry |= (dev_table_size >> 12) - 1; memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, &entry, sizeof(entry)); @@ -561,6 +518,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) { u8 *p = (void *)h, *end = (void *)h; struct ivhd_entry *dev; + int last_devid = -EINVAL; u32 ivhd_size = get_ivhd_header_size(h); @@ -577,14 +535,14 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) switch (dev->type) { case IVHD_DEV_ALL: /* Use maximum BDF value for DEV_ALL */ - update_last_devid(0xffff); - break; + return 0xffff; case IVHD_DEV_SELECT: case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: /* all the above subfield types refer to device ids */ - update_last_devid(dev->devid); + if (dev->devid > last_devid) + last_devid = dev->devid; break; default: break; @@ -594,7 +552,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) WARN_ON(p != end); - return 0; + return last_devid; } static int __init check_ivrs_checksum(struct acpi_table_header *table) @@ -618,38 +576,125 @@ static int __init check_ivrs_checksum(struct acpi_table_header *table) * id which we need to handle. This is the first of three functions which parse * the ACPI table. So we check the checksum here. */ -static int __init find_last_devid_acpi(struct acpi_table_header *table) +static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg) { u8 *p = (u8 *)table, *end = (u8 *)table; struct ivhd_header *h; + int last_devid, last_bdf = 0; p += IVRS_HEADER_LENGTH; end += table->length; while (p < end) { h = (struct ivhd_header *)p; - if (h->type == amd_iommu_target_ivhd_type) { - int ret = find_last_devid_from_ivhd(h); - - if (ret) - return ret; + if (h->pci_seg == pci_seg && + h->type == amd_iommu_target_ivhd_type) { + last_devid = find_last_devid_from_ivhd(h); + + if (last_devid < 0) + return -EINVAL; + if (last_devid > last_bdf) + last_bdf = last_devid; } p += h->length; } WARN_ON(p != end); - return 0; + return last_bdf; } /**************************************************************************** * * The following functions belong to the code path which parses the ACPI table * the second time. In this ACPI parsing iteration we allocate IOMMU specific - * data structures, initialize the device/alias/rlookup table and also - * basically initialize the hardware. + * data structures, initialize the per PCI segment device/alias/rlookup table + * and also basically initialize the hardware. * ****************************************************************************/ +/* Allocate per PCI segment device table */ +static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO | GFP_DMA32, + get_order(pci_seg->dev_table_size)); + if (!pci_seg->dev_table) + return -ENOMEM; + + return 0; +} + +static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = NULL; +} + +/* Allocate per PCI segment IOMMU rlookup table. */ +static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(pci_seg->rlookup_table_size)); + if (pci_seg->rlookup_table == NULL) + return -ENOMEM; + + return 0; +} + +static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->rlookup_table, + get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = NULL; +} + +static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + pci_seg->irq_lookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, + get_order(pci_seg->rlookup_table_size)); + kmemleak_alloc(pci_seg->irq_lookup_table, + pci_seg->rlookup_table_size, 1, GFP_KERNEL); + if (pci_seg->irq_lookup_table == NULL) + return -ENOMEM; + + return 0; +} + +static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) +{ + kmemleak_free(pci_seg->irq_lookup_table); + free_pages((unsigned long)pci_seg->irq_lookup_table, + get_order(pci_seg->rlookup_table_size)); + pci_seg->irq_lookup_table = NULL; +} + +static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) +{ + int i; + + pci_seg->alias_table = (void *)__get_free_pages(GFP_KERNEL, + get_order(pci_seg->alias_table_size)); + if (!pci_seg->alias_table) + return -ENOMEM; + + /* + * let all alias entries point to itself + */ + for (i = 0; i <= pci_seg->last_bdf; ++i) + pci_seg->alias_table[i] = i; + + return 0; +} + +static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) +{ + free_pages((unsigned long)pci_seg->alias_table, + get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = NULL; +} + /* * Allocates the command buffer. This buffer is per AMD IOMMU. We can * write commands to that buffer later and the IOMMU will execute them @@ -944,56 +989,59 @@ static void iommu_enable_gt(struct amd_iommu *iommu) } /* sets a specific bit in the device table entry. */ -static void set_dev_entry_bit(u16 devid, u8 bit) +static void __set_dev_entry_bit(struct dev_table_entry *dev_table, + u16 devid, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - amd_iommu_dev_table[devid].data[i] |= (1UL << _bit); + dev_table[devid].data[i] |= (1UL << _bit); } -static int get_dev_entry_bit(u16 devid, u8 bit) +static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +{ + struct dev_table_entry *dev_table = get_dev_table(iommu); + + return __set_dev_entry_bit(dev_table, devid, bit); +} + +static int __get_dev_entry_bit(struct dev_table_entry *dev_table, + u16 devid, u8 bit) { int i = (bit >> 6) & 0x03; int _bit = bit & 0x3f; - return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit; + return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit; } +static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit) +{ + struct dev_table_entry *dev_table = get_dev_table(iommu); -static bool copy_device_table(void) + return __get_dev_entry_bit(dev_table, devid, bit); +} + +static bool __copy_device_table(struct amd_iommu *iommu) { - u64 int_ctl, int_tab_len, entry = 0, last_entry = 0; + u64 int_ctl, int_tab_len, entry = 0; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; struct dev_table_entry *old_devtb = NULL; u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; - struct amd_iommu *iommu; u16 dom_id, dte_v, irq_v; gfp_t gfp_flag; u64 tmp; - if (!amd_iommu_pre_enabled) - return false; - - pr_warn("Translation is already enabled - trying to copy translation structures\n"); - for_each_iommu(iommu) { - /* All IOMMUs should use the same device table with the same size */ - lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); - hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); - entry = (((u64) hi) << 32) + lo; - if (last_entry && last_entry != entry) { - pr_err("IOMMU:%d should use the same dev table as others!\n", - iommu->index); - return false; - } - last_entry = entry; + /* Each IOMMU use separate device table with the same size */ + lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); + hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); + entry = (((u64) hi) << 32) + lo; - old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; - if (old_devtb_size != dev_table_size) { - pr_err("The device table size of IOMMU:%d is not expected!\n", - iommu->index); - return false; - } + old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; + if (old_devtb_size != pci_seg->dev_table_size) { + pr_err("The device table size of IOMMU:%d is not expected!\n", + iommu->index); + return false; } /* @@ -1009,37 +1057,37 @@ static bool copy_device_table(void) } old_devtb = (sme_active() && is_kdump_kernel()) ? (__force void *)ioremap_encrypted(old_devtb_phys, - dev_table_size) - : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); + pci_seg->dev_table_size) + : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB); if (!old_devtb) return false; gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32; - old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, - get_order(dev_table_size)); - if (old_dev_tbl_cpy == NULL) { + pci_seg->old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, + get_order(pci_seg->dev_table_size)); + if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); return false; } - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - old_dev_tbl_cpy[devid] = old_devtb[devid]; + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { + pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid]; dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; if (dte_v && dom_id) { - old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; - old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; + pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; + pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); /* If gcr3 table existed, mask it out */ if (old_devtb[devid].data[0] & DTE_FLAG_GV) { tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; - old_dev_tbl_cpy[devid].data[1] &= ~tmp; + pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp; tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; tmp |= DTE_FLAG_GV; - old_dev_tbl_cpy[devid].data[0] &= ~tmp; + pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp; } } @@ -1053,7 +1101,7 @@ static bool copy_device_table(void) return false; } - old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; + pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; } } memunmap(old_devtb); @@ -1061,21 +1109,42 @@ static bool copy_device_table(void) return true; } -void amd_iommu_apply_erratum_63(u16 devid) +static bool copy_device_table(void) { - int sysmgt; + struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; - sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); + if (!amd_iommu_pre_enabled) + return false; - if (sysmgt == 0x01) - set_dev_entry_bit(devid, DEV_ENTRY_IW); + pr_warn("Translation is already enabled - trying to copy translation structures\n"); + + /* + * All IOMMUs within PCI segment shares common device table. + * Hence copy device table only once per PCI segment. + */ + for_each_pci_segment(pci_seg) { + for_each_iommu(iommu) { + if (pci_seg->id != iommu->pci_seg->id) + continue; + if (!__copy_device_table(iommu)) + return false; + break; + } + } + + return true; } -/* Writes the specific IOMMU for a device into the rlookup table */ -static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid) { - amd_iommu_rlookup_table[devid] = iommu; + int sysmgt; + + sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) | + (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1); + + if (sysmgt == 0x01) + set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW); } /* @@ -1086,26 +1155,26 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, u16 devid, u32 flags, u32 ext_flags) { if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS); if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS); if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS); if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1); if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2); if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS); if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); + set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS); - amd_iommu_apply_erratum_63(devid); + amd_iommu_apply_erratum_63(iommu, devid); - set_iommu_for_device(iommu, devid); + amd_iommu_set_rlookup_table(iommu, devid); } -int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) +int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line) { struct devid_map *entry; struct list_head *list; @@ -1142,7 +1211,7 @@ int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) return 0; } -static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid, +static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid, bool cmd_line) { struct acpihid_map_entry *entry; @@ -1221,10 +1290,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, { u8 *p = (u8 *)h; u8 *end = p, flags = 0; - u16 devid = 0, devid_start = 0, devid_to = 0; + u16 devid = 0, devid_start = 0, devid_to = 0, seg_id; u32 dev_i, ext_flags = 0; bool alias = false; struct ivhd_entry *e; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; u32 ivhd_size; int ret; @@ -1256,19 +1326,21 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, while (p < end) { e = (struct ivhd_entry *)p; + seg_id = pci_seg->id; + switch (e->type) { case IVHD_DEV_ALL: DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags); - for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i) + for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i) set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: - DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x " "flags: %02x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags); @@ -1279,8 +1351,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_SELECT_RANGE_START: DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %02x:%02x.%x flags: %02x\n", - PCI_BUS_NUM(e->devid), + "devid: %04x:%02x:%02x.%x flags: %02x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags); @@ -1292,9 +1364,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_ALIAS: - DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x " "flags: %02x devid_to: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, @@ -1306,18 +1378,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, devid_to = e->ext >> 8; set_dev_entry_from_acpi(iommu, devid , e->flags, 0); set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); - amd_iommu_alias_table[devid] = devid_to; + pci_seg->alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %02x:%02x.%x flags: %02x " - "devid_to: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + "devid: %04x:%02x:%02x.%x flags: %02x " + "devid_to: %04x:%02x:%02x.%x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, - PCI_BUS_NUM(e->ext >> 8), + seg_id, PCI_BUS_NUM(e->ext >> 8), PCI_SLOT(e->ext >> 8), PCI_FUNC(e->ext >> 8)); @@ -1329,9 +1401,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " + DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x " "flags: %02x ext: %08x\n", - PCI_BUS_NUM(e->devid), + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, e->ext); @@ -1343,8 +1415,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_EXT_SELECT_RANGE: DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%02x:%02x.%x flags: %02x ext: %08x\n", - PCI_BUS_NUM(e->devid), + "%04x:%02x:%02x.%x flags: %02x ext: %08x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid), e->flags, e->ext); @@ -1356,15 +1428,15 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_RANGE_END: - DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", - PCI_BUS_NUM(e->devid), + DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n", + seg_id, PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { if (alias) { - amd_iommu_alias_table[dev_i] = devid_to; + pci_seg->alias_table[dev_i] = devid_to; set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags); } @@ -1375,11 +1447,11 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_SPECIAL: { u8 handle, type; const char *var; - u16 devid; + u32 devid; int ret; handle = e->ext & 0xff; - devid = (e->ext >> 8) & 0xffff; + devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8)); type = (e->ext >> 24) & 0xff; if (type == IVHD_SPECIAL_IOAPIC) @@ -1389,9 +1461,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, else var = "UNKNOWN"; - DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n", + DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n", var, (int)handle, - PCI_BUS_NUM(devid), + seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); @@ -1409,7 +1481,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } case IVHD_DEV_ACPI_HID: { - u16 devid; + u32 devid; u8 hid[ACPIHID_HID_LEN]; u8 uid[ACPIHID_UID_LEN]; int ret; @@ -1451,9 +1523,9 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } - devid = e->devid; - DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n", - hid, uid, + devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid); + DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n", + hid, uid, seg_id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); @@ -1483,6 +1555,74 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, return 0; } +/* Allocate PCI segment data structure */ +static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, + struct acpi_table_header *ivrs_base) +{ + struct amd_iommu_pci_seg *pci_seg; + int last_bdf; + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func we need to + * handle in this PCI segment. Upon this information the shared data + * structures for the PCI segments in the system will be allocated. + */ + last_bdf = find_last_devid_acpi(ivrs_base, id); + if (last_bdf < 0) + return NULL; + + pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL); + if (pci_seg == NULL) + return NULL; + + pci_seg->last_bdf = last_bdf; + DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); + pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); + + pci_seg->id = id; + init_llist_head(&pci_seg->dev_data_list); + INIT_LIST_HEAD(&pci_seg->unity_map); + list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list); + + if (alloc_dev_table(pci_seg)) + return NULL; + if (alloc_alias_table(pci_seg)) + return NULL; + if (alloc_rlookup_table(pci_seg)) + return NULL; + + return pci_seg; +} + +static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id, + struct acpi_table_header *ivrs_base) +{ + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) { + if (pci_seg->id == id) + return pci_seg; + } + + return alloc_pci_segment(id, ivrs_base); +} + +static void __init free_pci_segments(void) +{ + struct amd_iommu_pci_seg *pci_seg, *next; + + for_each_pci_segment_safe(pci_seg, next) { + list_del(&pci_seg->list); + free_irq_lookup_table(pci_seg); + free_rlookup_table(pci_seg); + free_alias_table(pci_seg); + free_dev_table(pci_seg); + kfree(pci_seg); + } +} + static void __init free_iommu_one(struct amd_iommu *iommu) { free_cwwb_sem(iommu); @@ -1567,10 +1707,17 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) * together and also allocates the command buffer and programs the * hardware. It does NOT enable the IOMMU. This is done afterwards. */ -static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) +static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, + struct acpi_table_header *ivrs_base) { + struct amd_iommu_pci_seg *pci_seg; int ret; + pci_seg = get_pci_segment(h->pci_seg, ivrs_base); + if (pci_seg == NULL) + return -ENOMEM; + iommu->pci_seg = pci_seg; + raw_spin_lock_init(&iommu->lock); atomic64_set(&iommu->cmd_sem_val, 0); @@ -1591,7 +1738,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ iommu->devid = h->devid; iommu->cap_ptr = h->cap_ptr; - iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; switch (h->type) { @@ -1681,7 +1827,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) * Make sure IOMMU is not considered to translate itself. The IVRS * table tells us so, but this is a lie! */ - amd_iommu_rlookup_table[iommu->devid] = NULL; + pci_seg->rlookup_table[iommu->devid] = NULL; return 0; } @@ -1730,11 +1876,11 @@ static int __init init_iommu_all(struct acpi_table_header *table) h = (struct ivhd_header *)p; if (*p == amd_iommu_target_ivhd_type) { - DUMP_printk("device: %02x:%02x.%01x cap: %04x " - "seg: %d flags: %01x info %04x\n", - PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), - PCI_FUNC(h->devid), h->cap_ptr, - h->pci_seg, h->flags, h->info); + DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x " + "flags: %01x info %04x\n", + h->pci_seg, PCI_BUS_NUM(h->devid), + PCI_SLOT(h->devid), PCI_FUNC(h->devid), + h->cap_ptr, h->flags, h->info); DUMP_printk(" mmio-addr: %016llx\n", h->mmio_phys); @@ -1742,7 +1888,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) if (iommu == NULL) return -ENOMEM; - ret = init_iommu_one(iommu, h); + ret = init_iommu_one(iommu, h, table); if (ret) return ret; } @@ -1841,7 +1987,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int cap_ptr = iommu->cap_ptr; int ret; - iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid), + iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, + PCI_BUS_NUM(iommu->devid), iommu->devid & 0xff); if (!iommu->dev) return -ENODEV; @@ -1904,7 +2051,8 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) int i, j; iommu->root_pdev = - pci_get_domain_bus_and_slot(0, iommu->dev->bus->number, + pci_get_domain_bus_and_slot(iommu->pci_seg->id, + iommu->dev->bus->number, PCI_DEVFN(0, 0)); /* @@ -1978,6 +2126,7 @@ static void print_iommu_info(void) static int __init amd_iommu_init_pci(void) { struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; int ret; for_each_iommu(iommu) { @@ -2001,7 +2150,8 @@ static int __init amd_iommu_init_pci(void) */ ret = amd_iommu_init_api(); - init_device_table_dma(); + for_each_pci_segment(pci_seg) + init_device_table_dma(pci_seg); for_each_iommu(iommu) iommu_flush_all_caches(iommu); @@ -2267,19 +2417,28 @@ static int iommu_init_irq(struct amd_iommu *iommu) static void __init free_unity_maps(void) { struct unity_map_entry *entry, *next; + struct amd_iommu_pci_seg *p, *pci_seg; - list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { - list_del(&entry->list); - kfree(entry); + for_each_pci_segment_safe(pci_seg, p) { + list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) { + list_del(&entry->list); + kfree(entry); + } } } /* called for unity map ACPI definition */ -static int __init init_unity_map_range(struct ivmd_header *m) +static int __init init_unity_map_range(struct ivmd_header *m, + struct acpi_table_header *ivrs_base) { struct unity_map_entry *e = NULL; + struct amd_iommu_pci_seg *pci_seg; char *s; + pci_seg = get_pci_segment(m->pci_seg, ivrs_base); + if (pci_seg == NULL) + return -ENOMEM; + e = kzalloc(sizeof(*e), GFP_KERNEL); if (e == NULL) return -ENOMEM; @@ -2295,7 +2454,7 @@ static int __init init_unity_map_range(struct ivmd_header *m) case ACPI_IVMD_TYPE_ALL: s = "IVMD_TYPE_ALL\t\t"; e->devid_start = 0; - e->devid_end = amd_iommu_last_bdf; + e->devid_end = pci_seg->last_bdf; break; case ACPI_IVMD_TYPE_RANGE: s = "IVMD_TYPE_RANGE\t\t"; @@ -2317,14 +2476,16 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (m->flags & IVMD_FLAG_EXCL_RANGE) e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; - DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" - " range_start: %016llx range_end: %016llx flags: %x\n", s, + DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: " + "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx" + " flags: %x\n", s, m->pci_seg, PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), - PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end), + PCI_FUNC(e->devid_start), m->pci_seg, + PCI_BUS_NUM(e->devid_end), PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), e->address_start, e->address_end, m->flags); - list_add_tail(&e->list, &amd_iommu_unity_map); + list_add_tail(&e->list, &pci_seg->unity_map); return 0; } @@ -2341,7 +2502,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) - init_unity_map_range(m); + init_unity_map_range(m, table); p += m->length; } @@ -2352,35 +2513,47 @@ static int __init init_memory_definitions(struct acpi_table_header *table) /* * Init the device table to not allow DMA access for devices */ -static void init_device_table_dma(void) +static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) { u32 devid; + struct dev_table_entry *dev_table = pci_seg->dev_table; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - set_dev_entry_bit(devid, DEV_ENTRY_VALID); - set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); + if (dev_table == NULL) + return; + + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { + __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID); + __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION); } } -static void __init uninit_device_table_dma(void) +static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg) { u32 devid; + struct dev_table_entry *dev_table = pci_seg->dev_table; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - amd_iommu_dev_table[devid].data[0] = 0ULL; - amd_iommu_dev_table[devid].data[1] = 0ULL; + if (dev_table == NULL) + return; + + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { + dev_table[devid].data[0] = 0ULL; + dev_table[devid].data[1] = 0ULL; } } static void init_device_table(void) { + struct amd_iommu_pci_seg *pci_seg; u32 devid; if (!amd_iommu_irq_remap) return; - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) - set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN); + for_each_pci_segment(pci_seg) { + for (devid = 0; devid <= pci_seg->last_bdf; ++devid) + __set_dev_entry_bit(pci_seg->dev_table, + devid, DEV_ENTRY_IRQ_TBL_EN); + } } static void iommu_init_flags(struct amd_iommu *iommu) @@ -2524,7 +2697,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) static void early_enable_iommus(void) { struct amd_iommu *iommu; - + struct amd_iommu_pci_seg *pci_seg; if (!copy_device_table()) { /* @@ -2534,9 +2707,14 @@ static void early_enable_iommus(void) */ if (amd_iommu_pre_enabled) pr_err("Failed to copy DEV table from previous kernel.\n"); - if (old_dev_tbl_cpy != NULL) - free_pages((unsigned long)old_dev_tbl_cpy, - get_order(dev_table_size)); + + for_each_pci_segment(pci_seg) { + if (pci_seg->old_dev_tbl_cpy != NULL) { + free_pages((unsigned long)pci_seg->old_dev_tbl_cpy, + get_order(pci_seg->dev_table_size)); + pci_seg->old_dev_tbl_cpy = NULL; + } + } for_each_iommu(iommu) { clear_translation_pre_enabled(iommu); @@ -2544,9 +2722,13 @@ static void early_enable_iommus(void) } } else { pr_info("Copied DEV table from previous kernel.\n"); - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - amd_iommu_dev_table = old_dev_tbl_cpy; + + for_each_pci_segment(pci_seg) { + free_pages((unsigned long)pci_seg->dev_table, + get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; + } + for_each_iommu(iommu) { iommu_disable_command_buffer(iommu); iommu_disable_event_buffer(iommu); @@ -2638,27 +2820,11 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_iommu_resources(void) { - kmemleak_free(irq_lookup_table); - free_pages((unsigned long)irq_lookup_table, - get_order(rlookup_table_size)); - irq_lookup_table = NULL; - kmem_cache_destroy(amd_iommu_irq_cache); amd_iommu_irq_cache = NULL; - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); - amd_iommu_rlookup_table = NULL; - - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); - amd_iommu_alias_table = NULL; - - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - amd_iommu_dev_table = NULL; - free_iommu_all(); + free_pci_segments(); } /* SB IOAPIC is always on this device in AMD systems */ @@ -2757,7 +2923,7 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; - int i, remap_cache_sz, ret; + int remap_cache_sz, ret; acpi_status status; if (!amd_iommu_detected) @@ -2785,42 +2951,8 @@ static int __init early_amd_iommu_init(void) amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); - /* - * First parse ACPI tables to find the largest Bus/Dev/Func - * we need to handle. Upon this information the shared data - * structures for the IOMMUs in the system will be allocated - */ - ret = find_last_devid_acpi(ivrs_base); - if (ret) - goto out; - - dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); - alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); - rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); - /* Device table - directly used by all IOMMUs */ ret = -ENOMEM; - amd_iommu_dev_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO | GFP_DMA32, - get_order(dev_table_size)); - if (amd_iommu_dev_table == NULL) - goto out; - - /* - * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the - * IOMMU see for that device - */ - amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(alias_table_size)); - if (amd_iommu_alias_table == NULL) - goto out; - - /* IOMMU rlookup table - find the IOMMU for a specific device */ - amd_iommu_rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_rlookup_table == NULL) - goto out; amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, @@ -2828,12 +2960,6 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_pd_alloc_bitmap == NULL) goto out; - /* - * let all alias entries point to itself - */ - for (i = 0; i <= amd_iommu_last_bdf; ++i) - amd_iommu_alias_table[i] = i; - /* * never allocate domain 0 because its used as the non-allocated and * error value placeholder @@ -2856,6 +2982,7 @@ static int __init early_amd_iommu_init(void) amd_iommu_irq_remap = check_ioapic_information(); if (amd_iommu_irq_remap) { + struct amd_iommu_pci_seg *pci_seg; /* * Interrupt remapping enabled, create kmem_cache for the * remapping tables. @@ -2872,13 +2999,10 @@ static int __init early_amd_iommu_init(void) if (!amd_iommu_irq_cache) goto out; - irq_lookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - kmemleak_alloc(irq_lookup_table, rlookup_table_size, - 1, GFP_KERNEL); - if (!irq_lookup_table) - goto out; + for_each_pci_segment(pci_seg) { + if (alloc_irq_lookup_table(pci_seg)) + goto out; + } } ret = init_memory_definitions(ivrs_base); @@ -3015,8 +3139,11 @@ static int __init state_next(void) free_iommu_resources(); } else { struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) + uninit_device_table_dma(pci_seg); - uninit_device_table_dma(); for_each_iommu(iommu) iommu_flush_all_caches(iommu); } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index ea44e8c2412a6..7f07631ee5f14 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -61,9 +61,6 @@ static DEFINE_SPINLOCK(pd_bitmap_lock); -/* List of all available dev_data structures */ -static LLIST_HEAD(dev_data_list); - LIST_HEAD(ioapic_map); LIST_HEAD(hpet_map); LIST_HEAD(acpihid_map); @@ -94,13 +91,6 @@ static void detach_device(struct device *dev); * ****************************************************************************/ -static inline u16 get_pci_device_id(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - return pci_dev_id(pdev); -} - static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { @@ -121,16 +111,74 @@ static inline int get_acpihid_device_id(struct device *dev, return -EINVAL; } -static inline int get_device_id(struct device *dev) +static inline int get_device_sbdf_id(struct device *dev) { - int devid; + int sbdf; if (dev_is_pci(dev)) - devid = get_pci_device_id(dev); + sbdf = get_pci_sbdf_id(to_pci_dev(dev)); else - devid = get_acpihid_device_id(dev, NULL); + sbdf = get_acpihid_device_id(dev, NULL); + + return sbdf; +} + +struct dev_table_entry *get_dev_table(struct amd_iommu *iommu) +{ + struct dev_table_entry *dev_table; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + BUG_ON(pci_seg == NULL); + dev_table = pci_seg->dev_table; + BUG_ON(dev_table == NULL); + + return dev_table; +} + +static inline u16 get_device_segment(struct device *dev) +{ + u16 seg; - return devid; + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + seg = pci_domain_nr(pdev->bus); + } else { + u32 devid = get_acpihid_device_id(dev, NULL); + + seg = PCI_SBDF_TO_SEGID(devid); + } + + return seg; +} + +/* Writes the specific IOMMU for a device into the PCI segment rlookup table */ +void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid) +{ + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + pci_seg->rlookup_table[devid] = iommu; +} + +static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid) +{ + struct amd_iommu_pci_seg *pci_seg; + + for_each_pci_segment(pci_seg) { + if (pci_seg->id == seg) + return pci_seg->rlookup_table[devid]; + } + return NULL; +} + +static struct amd_iommu *rlookup_amd_iommu(struct device *dev) +{ + u16 seg = get_device_segment(dev); + int devid = get_device_sbdf_id(dev); + + if (devid < 0) + return NULL; + return __rlookup_amd_iommu(seg, PCI_SBDF_TO_DEVID(devid)); } static struct protection_domain *to_pdomain(struct iommu_domain *dom) @@ -138,9 +186,10 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -static struct iommu_dev_data *alloc_dev_data(u16 devid) +static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); if (!dev_data) @@ -150,19 +199,20 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); - llist_add(&dev_data->dev_data_list, &dev_data_list); + llist_add(&dev_data->dev_data_list, &pci_seg->dev_data_list); return dev_data; } -static struct iommu_dev_data *search_dev_data(u16 devid) +static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; struct llist_node *node; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - if (llist_empty(&dev_data_list)) + if (llist_empty(&pci_seg->dev_data_list)) return NULL; - node = dev_data_list.first; + node = pci_seg->dev_data_list.first; llist_for_each_entry(dev_data, node, dev_data_list) { if (dev_data->devid == devid) return dev_data; @@ -173,67 +223,74 @@ static struct iommu_dev_data *search_dev_data(u16 devid) static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) { + struct amd_iommu *iommu; + struct dev_table_entry *dev_table; u16 devid = pci_dev_id(pdev); if (devid == alias) return 0; - amd_iommu_rlookup_table[alias] = - amd_iommu_rlookup_table[devid]; - memcpy(amd_iommu_dev_table[alias].data, - amd_iommu_dev_table[devid].data, - sizeof(amd_iommu_dev_table[alias].data)); + iommu = rlookup_amd_iommu(&pdev->dev); + if (!iommu) + return 0; + + amd_iommu_set_rlookup_table(iommu, alias); + dev_table = get_dev_table(iommu); + memcpy(dev_table[alias].data, + dev_table[devid].data, + sizeof(dev_table[alias].data)); return 0; } -static void clone_aliases(struct pci_dev *pdev) +static void clone_aliases(struct amd_iommu *iommu, struct device *dev) { - if (!pdev) + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) return; + pdev = to_pci_dev(dev); /* * The IVRS alias stored in the alias table may not be * part of the PCI DMA aliases if it's bus differs * from the original device. */ - clone_alias(pdev, amd_iommu_alias_table[pci_dev_id(pdev)], NULL); + clone_alias(pdev, iommu->pci_seg->alias_table[pci_dev_id(pdev)], NULL); pci_for_each_dma_alias(pdev, clone_alias, NULL); } -static struct pci_dev *setup_aliases(struct device *dev) +static void setup_aliases(struct amd_iommu *iommu, struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; u16 ivrs_alias; /* For ACPI HID devices, there are no aliases */ if (!dev_is_pci(dev)) - return NULL; + return; /* * Add the IVRS alias to the pci aliases if it is on the same * bus. The IVRS table may know about a quirk that we don't. */ - ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)]; + ivrs_alias = pci_seg->alias_table[pci_dev_id(pdev)]; if (ivrs_alias != pci_dev_id(pdev) && PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1); - clone_aliases(pdev); - - return pdev; + clone_aliases(iommu, dev); } -static struct iommu_dev_data *find_dev_data(u16 devid) +static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid) { struct iommu_dev_data *dev_data; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - dev_data = search_dev_data(devid); + dev_data = search_dev_data(iommu, devid); if (dev_data == NULL) { - dev_data = alloc_dev_data(devid); + dev_data = alloc_dev_data(iommu, devid); if (!dev_data) return NULL; @@ -295,42 +352,49 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev) */ static bool check_device(struct device *dev) { - int devid; + struct amd_iommu_pci_seg *pci_seg; + struct amd_iommu *iommu; + int devid, sbdf; if (!dev) return false; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return false; + devid = PCI_SBDF_TO_DEVID(sbdf); - /* Out of our scope? */ - if (devid > amd_iommu_last_bdf) + iommu = rlookup_amd_iommu(dev); + if (!iommu) return false; - if (amd_iommu_rlookup_table[devid] == NULL) + /* Out of our scope? */ + pci_seg = iommu->pci_seg; + if (devid > pci_seg->last_bdf) return false; return true; } -static int iommu_init_device(struct device *dev) +static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) { struct iommu_dev_data *dev_data; - int devid; + int devid, sbdf; if (dev_iommu_priv_get(dev)) return 0; - devid = get_device_id(dev); - if (devid < 0) - return devid; + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) + return sbdf; - dev_data = find_dev_data(devid); + devid = PCI_SBDF_TO_DEVID(sbdf); + dev_data = find_dev_data(iommu, devid); if (!dev_data) return -ENOMEM; - dev_data->pdev = setup_aliases(dev); + dev_data->dev = dev; + setup_aliases(iommu, dev); /* * By default we use passthrough mode for IOMMUv2 capable device. @@ -340,9 +404,6 @@ static int iommu_init_device(struct device *dev) */ if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { - struct amd_iommu *iommu; - - iommu = amd_iommu_rlookup_table[dev_data->devid]; dev_data->iommu_v2 = iommu->is_iommu_v2; } @@ -351,18 +412,21 @@ static int iommu_init_device(struct device *dev) return 0; } -static void iommu_ignore_device(struct device *dev) +static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev) { - int devid; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + struct dev_table_entry *dev_table = get_dev_table(iommu); + int devid, sbdf; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return; - amd_iommu_rlookup_table[devid] = NULL; - memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); + devid = PCI_SBDF_TO_DEVID(sbdf); + pci_seg->rlookup_table[devid] = NULL; + memset(&dev_table[devid], 0, sizeof(struct dev_table_entry)); - setup_aliases(dev); + setup_aliases(iommu, dev); } static void amd_iommu_uninit_device(struct device *dev) @@ -390,13 +454,13 @@ static void amd_iommu_uninit_device(struct device *dev) * ****************************************************************************/ -static void dump_dte_entry(u16 devid) +static void dump_dte_entry(struct amd_iommu *iommu, u16 devid) { int i; + struct dev_table_entry *dev_table = get_dev_table(iommu); for (i = 0; i < 4; ++i) - pr_err("DTE[%d]: %016llx\n", i, - amd_iommu_dev_table[devid].data[i]); + pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]); } static void dump_command(unsigned long phys_addr) @@ -408,7 +472,7 @@ static void dump_command(unsigned long phys_addr) pr_err("CMD[%d]: %08x\n", i, cmd->data[i]); } -static void amd_iommu_report_rmp_hw_error(volatile u32 *event) +static void amd_iommu_report_rmp_hw_error(struct amd_iommu *iommu, volatile u32 *event) { struct iommu_dev_data *dev_data = NULL; int devid, vmg_tag, flags; @@ -420,7 +484,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; spa = ((u64)event[3] << 32) | (event[2] & 0xFFFFFFF8); - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -431,8 +495,8 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) vmg_tag, spa, flags); } } else { - pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), vmg_tag, spa, flags); } @@ -440,7 +504,7 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) pci_dev_put(pdev); } -static void amd_iommu_report_rmp_fault(volatile u32 *event) +static void amd_iommu_report_rmp_fault(struct amd_iommu *iommu, volatile u32 *event) { struct iommu_dev_data *dev_data = NULL; int devid, flags_rmp, vmg_tag, flags; @@ -453,7 +517,7 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; gpa = ((u64)event[3] << 32) | event[2]; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -464,8 +528,8 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) vmg_tag, gpa, flags_rmp, flags); } } else { - pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%04x:%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), vmg_tag, gpa, flags_rmp, flags); } @@ -473,13 +537,14 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) pci_dev_put(pdev); } -static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, +static void amd_iommu_report_page_fault(struct amd_iommu *iommu, + u16 devid, u16 domain_id, u64 address, int flags) { struct iommu_dev_data *dev_data = NULL; struct pci_dev *pdev; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + pdev = pci_get_domain_bus_and_slot(iommu->pci_seg->id, PCI_BUS_NUM(devid), devid & 0xff); if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); @@ -490,8 +555,8 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, domain_id, address, flags); } } else { - pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%04x:%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), domain_id, address, flags); } @@ -527,26 +592,26 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) } if (type == EVENT_TYPE_IO_FAULT) { - amd_iommu_report_page_fault(devid, pasid, address, flags); + amd_iommu_report_page_fault(iommu, devid, pasid, address, flags); return; } switch (type) { case EVENT_TYPE_ILL_DEV: - dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); - dump_dte_entry(devid); + dump_dte_entry(iommu, devid); break; case EVENT_TYPE_DEV_TAB_ERR: - dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + dev_err(dev, "Event logged [DEV_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x " "address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%04x:%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; case EVENT_TYPE_ILL_CMD: @@ -558,26 +623,26 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) address, flags); break; case EVENT_TYPE_IOTLB_INV_TO: - dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%llx]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [IOTLB_INV_TIMEOUT device=%04x:%02x:%02x.%x address=0x%llx]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address); break; case EVENT_TYPE_INV_DEV_REQ: - dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [INVALID_DEVICE_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; case EVENT_TYPE_RMP_FAULT: - amd_iommu_report_rmp_fault(event); + amd_iommu_report_rmp_fault(iommu, event); break; case EVENT_TYPE_RMP_HW_ERR: - amd_iommu_report_rmp_hw_error(event); + amd_iommu_report_rmp_hw_error(iommu, event); break; case EVENT_TYPE_INV_PPR_REQ: pasid = PPR_PASID(*((u64 *)__evt)); tag = event[1] & 0x03FF; - dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", - PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags, tag); break; default: @@ -614,7 +679,7 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) fault.address = raw[1]; fault.pasid = PPR_PASID(raw[0]); - fault.device_id = PPR_DEVID(raw[0]); + fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0])); fault.tag = PPR_TAG(raw[0]); fault.flags = PPR_FLAGS(raw[0]); @@ -1111,8 +1176,9 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (devid = 0; devid <= 0xffff; ++devid) + for (devid = 0; devid <= last_bdf; ++devid) iommu_flush_dte(iommu, devid); iommu_completion_wait(iommu); @@ -1125,8 +1191,9 @@ static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) { u32 dom_id; + u16 last_bdf = iommu->pci_seg->last_bdf; - for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { + for (dom_id = 0; dom_id <= last_bdf; ++dom_id) { struct iommu_cmd cmd; build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, dom_id, 1); @@ -1169,11 +1236,12 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) { u32 devid; + u16 last_bdf = iommu->pci_seg->last_bdf; if (iommu->irtcachedis_enabled) return; - for (devid = 0; devid <= MAX_DEV_TABLE_ENTRIES; devid++) + for (devid = 0; devid <= last_bdf; devid++) iommu_flush_irt(iommu, devid); iommu_completion_wait(iommu); @@ -1227,7 +1295,9 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, int qdep; qdep = dev_data->ats.qdep; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return -EINVAL; build_inv_iotlb_pages(&cmd, dev_data->devid, qdep, address, size); @@ -1247,20 +1317,28 @@ static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) static int device_flush_dte(struct iommu_dev_data *dev_data) { struct amd_iommu *iommu; + struct pci_dev *pdev = NULL; + struct amd_iommu_pci_seg *pci_seg; u16 alias; int ret; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return -EINVAL; + + if (dev_is_pci(dev_data->dev)) + pdev = to_pci_dev(dev_data->dev); - if (dev_data->pdev) - ret = pci_for_each_dma_alias(dev_data->pdev, + if (pdev) + ret = pci_for_each_dma_alias(pdev, device_flush_dte_alias, iommu); else ret = iommu_flush_dte(iommu, dev_data->devid); if (ret) return ret; - alias = amd_iommu_alias_table[dev_data->devid]; + pci_seg = iommu->pci_seg; + alias = pci_seg->alias_table[dev_data->devid]; if (alias != dev_data->devid) { ret = iommu_flush_dte(iommu, alias); if (ret) @@ -1476,12 +1554,13 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -static void set_dte_entry(u16 devid, struct protection_domain *domain, - bool ats, bool ppr) +static void set_dte_entry(struct amd_iommu *iommu, u16 devid, + struct protection_domain *domain, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; + struct dev_table_entry *dev_table = get_dev_table(iommu); if (domain->iop.mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->iop.root); @@ -1490,14 +1569,12 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; - flags = amd_iommu_dev_table[devid].data[1]; + flags = dev_table[devid].data[1]; if (ats) flags |= DTE_FLAG_IOTLB; if (ppr) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - if (iommu_feature(iommu, FEATURE_EPHSUP)) pte_root |= 1ULL << DEV_ENTRY_PPR; } @@ -1531,9 +1608,9 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, flags &= ~DEV_DOMID_MASK; flags |= domain->id; - old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK; - amd_iommu_dev_table[devid].data[1] = flags; - amd_iommu_dev_table[devid].data[0] = pte_root; + old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK; + dev_table[devid].data[1] = flags; + dev_table[devid].data[0] = pte_root; /* * A kdump kernel might be replacing a domain ID that was copied from @@ -1541,19 +1618,19 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, * entries for the old domain ID that is being overwritten */ if (old_domid) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - amd_iommu_flush_tlb_domid(iommu, old_domid); } } -static void clear_dte_entry(u16 devid) +static void clear_dte_entry(struct amd_iommu *iommu, u16 devid) { + struct dev_table_entry *dev_table = get_dev_table(iommu); + /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; - amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; + dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; + dev_table[devid].data[1] &= DTE_FLAG_MASK; - amd_iommu_apply_erratum_63(devid); + amd_iommu_apply_erratum_63(iommu, devid); } static void do_attach(struct iommu_dev_data *dev_data, @@ -1562,7 +1639,9 @@ static void do_attach(struct iommu_dev_data *dev_data, struct amd_iommu *iommu; bool ats; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return; ats = dev_data->ats.enabled; /* Update data structures */ @@ -1574,9 +1653,9 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(dev_data->devid, domain, + set_dte_entry(iommu, dev_data->devid, domain, ats, dev_data->iommu_v2); - clone_aliases(dev_data->pdev); + clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); } @@ -1586,13 +1665,15 @@ static void do_detach(struct iommu_dev_data *dev_data) struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + return; /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); - clear_dte_entry(dev_data->devid); - clone_aliases(dev_data->pdev); + clear_dte_entry(iommu, dev_data->devid); + clone_aliases(iommu, dev_data->dev); /* Flush the DTE entry */ device_flush_dte(dev_data); @@ -1766,23 +1847,24 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) { struct iommu_device *iommu_dev; struct amd_iommu *iommu; - int ret, devid; + int ret; if (!check_device(dev)) return ERR_PTR(-ENODEV); - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return ERR_PTR(-ENODEV); if (dev_iommu_priv_get(dev)) return &iommu->iommu; - ret = iommu_init_device(dev); + ret = iommu_init_device(iommu, dev); if (ret) { if (ret != -ENOTSUPP) dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_dev = ERR_PTR(ret); - iommu_ignore_device(dev); + iommu_ignore_device(iommu, dev); } else { amd_iommu_set_pci_msi_domain(dev, iommu); iommu_dev = &iommu->iommu; @@ -1802,13 +1884,14 @@ static void amd_iommu_probe_finalize(struct device *dev) static void amd_iommu_release_device(struct device *dev) { - int devid = get_device_id(dev); struct amd_iommu *iommu; if (!check_device(dev)) return; - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return; amd_iommu_uninit_device(dev); iommu_completion_wait(iommu); @@ -1833,9 +1916,13 @@ static void update_device_table(struct protection_domain *domain) struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, + struct amd_iommu *iommu = rlookup_amd_iommu(dev_data->dev); + + if (!iommu) + continue; + set_dte_entry(iommu, dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); - clone_aliases(dev_data->pdev); + clone_aliases(iommu, dev_data->dev); } } @@ -2021,7 +2108,6 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); - int devid = get_device_id(dev); struct amd_iommu *iommu; if (!check_device(dev)) @@ -2030,7 +2116,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (dev_data->domain != NULL) detach_device(dev); - iommu = amd_iommu_rlookup_table[devid]; + iommu = rlookup_amd_iommu(dev); if (!iommu) return; @@ -2057,7 +2143,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(dev); if (!iommu) return -EINVAL; @@ -2184,13 +2270,21 @@ static void amd_iommu_get_resv_regions(struct device *dev, { struct iommu_resv_region *region; struct unity_map_entry *entry; - int devid; + struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; + int devid, sbdf; - devid = get_device_id(dev); - if (devid < 0) + sbdf = get_device_sbdf_id(dev); + if (sbdf < 0) return; - list_for_each_entry(entry, &amd_iommu_unity_map, list) { + devid = PCI_SBDF_TO_DEVID(sbdf); + iommu = rlookup_amd_iommu(dev); + if (!iommu) + return; + pci_seg = iommu->pci_seg; + + list_for_each_entry(entry, &pci_seg->unity_map, list) { int type, prot = 0; size_t length; @@ -2426,8 +2520,9 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, continue; qdep = dev_data->ats.qdep; - iommu = amd_iommu_rlookup_table[dev_data->devid]; - + iommu = rlookup_amd_iommu(dev_data->dev); + if (!iommu) + continue; build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid, qdep, address, size); @@ -2589,7 +2684,9 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, struct iommu_cmd cmd; dev_data = dev_iommu_priv_get(&pdev->dev); - iommu = amd_iommu_rlookup_table[dev_data->devid]; + iommu = rlookup_amd_iommu(&pdev->dev); + if (!iommu) + return -ENODEV; build_complete_ppr(&cmd, dev_data->devid, pasid, status, tag, dev_data->pri_tlp); @@ -2651,30 +2748,35 @@ EXPORT_SYMBOL(amd_iommu_device_info); static struct irq_chip amd_ir_chip; static DEFINE_SPINLOCK(iommu_table_lock); -static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) +static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, + struct irq_remap_table *table) { u64 dte; + struct dev_table_entry *dev_table = get_dev_table(iommu); - dte = amd_iommu_dev_table[devid].data[2]; + dte = dev_table[devid].data[2]; dte &= ~DTE_IRQ_PHYS_ADDR_MASK; dte |= iommu_virt_to_phys(table->table); dte |= DTE_IRQ_REMAP_INTCTL; dte |= DTE_INTTABLEN; dte |= DTE_IRQ_REMAP_ENABLE; - amd_iommu_dev_table[devid].data[2] = dte; + dev_table[devid].data[2] = dte; } -static struct irq_remap_table *get_irq_table(u16 devid) +static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) { struct irq_remap_table *table; + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; - if (WARN_ONCE(!amd_iommu_rlookup_table[devid], - "%s: no iommu for devid %x\n", __func__, devid)) + if (WARN_ONCE(!pci_seg->rlookup_table[devid], + "%s: no iommu for devid %x:%x\n", + __func__, pci_seg->id, devid)) return NULL; - table = irq_lookup_table[devid]; - if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid)) + table = pci_seg->irq_lookup_table[devid]; + if (WARN_ONCE(!table, "%s: no table for devid %x:%x\n", + __func__, pci_seg->id, devid)) return NULL; return table; @@ -2707,8 +2809,10 @@ static struct irq_remap_table *__alloc_irq_table(void) static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { - irq_lookup_table[devid] = table; - set_dte_irq_entry(devid, table); + struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg; + + pci_seg->irq_lookup_table[devid] = table; + set_dte_irq_entry(iommu, devid, table); iommu_flush_dte(iommu, devid); } @@ -2716,35 +2820,38 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, void *data) { struct irq_remap_table *table = data; + struct amd_iommu_pci_seg *pci_seg; + struct amd_iommu *iommu = rlookup_amd_iommu(&pdev->dev); - irq_lookup_table[alias] = table; - set_dte_irq_entry(alias, table); + if (!iommu) + return -EINVAL; - iommu_flush_dte(amd_iommu_rlookup_table[alias], alias); + pci_seg = iommu->pci_seg; + pci_seg->irq_lookup_table[alias] = table; + set_dte_irq_entry(iommu, alias, table); + iommu_flush_dte(pci_seg->rlookup_table[alias], alias); return 0; } -static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) +static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, + u16 devid, struct pci_dev *pdev) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; - struct amd_iommu *iommu; + struct amd_iommu_pci_seg *pci_seg; unsigned long flags; u16 alias; spin_lock_irqsave(&iommu_table_lock, flags); - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - goto out_unlock; - - table = irq_lookup_table[devid]; + pci_seg = iommu->pci_seg; + table = pci_seg->irq_lookup_table[devid]; if (table) goto out_unlock; - alias = amd_iommu_alias_table[devid]; - table = irq_lookup_table[alias]; + alias = pci_seg->alias_table[devid]; + table = pci_seg->irq_lookup_table[alias]; if (table) { set_remap_table_entry(iommu, devid, table); goto out_wait; @@ -2758,11 +2865,11 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) spin_lock_irqsave(&iommu_table_lock, flags); - table = irq_lookup_table[devid]; + table = pci_seg->irq_lookup_table[devid]; if (table) goto out_unlock; - table = irq_lookup_table[alias]; + table = pci_seg->irq_lookup_table[alias]; if (table) { set_remap_table_entry(iommu, devid, table); goto out_wait; @@ -2793,18 +2900,14 @@ static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) return table; } -static int alloc_irq_index(u16 devid, int count, bool align, - struct pci_dev *pdev) +static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, + bool align, struct pci_dev *pdev) { struct irq_remap_table *table; int index, c, alignment = 1; unsigned long flags; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - - if (!iommu) - return -ENODEV; - table = alloc_irq_table(devid, pdev); + table = alloc_irq_table(iommu, devid, pdev); if (!table) return -ENODEV; @@ -2843,19 +2946,14 @@ static int alloc_irq_index(u16 devid, int count, bool align, return index; } -static int __modify_irte_ga(u16 devid, int index, struct irte_ga *irte) +static int __modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, struct irte_ga *irte) { bool ret; struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; struct irte_ga *entry; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return -EINVAL; - - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -2880,16 +2978,11 @@ static int __modify_irte_ga(u16 devid, int index, struct irte_ga *irte) return 0; } -static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte) +static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, struct irte_ga *irte) { bool ret; - struct amd_iommu *iommu; - - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return -EINVAL; - ret = __modify_irte_ga(devid, index, irte); + ret = __modify_irte_ga(iommu, devid, index, irte); if (ret) return ret; @@ -2898,17 +2991,13 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte) return 0; } -static int modify_irte(u16 devid, int index, union irte *irte) +static int modify_irte(struct amd_iommu *iommu, + u16 devid, int index, union irte *irte) { struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return -EINVAL; - - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return -ENOMEM; @@ -2921,17 +3010,12 @@ static int modify_irte(u16 devid, int index, union irte *irte) return 0; } -static void free_irte(u16 devid, int index) +static void free_irte(struct amd_iommu *iommu, u16 devid, int index) { struct irq_remap_table *table; - struct amd_iommu *iommu; unsigned long flags; - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - return; - - table = get_irq_table(devid); + table = get_irq_table(iommu, devid); if (!table) return; @@ -2972,49 +3056,49 @@ static void irte_ga_prepare(void *entry, irte->lo.fields_remap.valid = 1; } -static void irte_activate(void *entry, u16 devid, u16 index) +static void irte_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { union irte *irte = (union irte *) entry; irte->fields.valid = 1; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_activate(void *entry, u16 devid, u16 index) +static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 1; - modify_irte_ga(devid, index, irte); + modify_irte_ga(iommu, devid, index, irte); } -static void irte_deactivate(void *entry, u16 devid, u16 index) +static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { union irte *irte = (union irte *) entry; irte->fields.valid = 0; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_deactivate(void *entry, u16 devid, u16 index) +static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) { struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 0; - modify_irte_ga(devid, index, irte); + modify_irte_ga(iommu, devid, index, irte); } -static void irte_set_affinity(void *entry, u16 devid, u16 index, +static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, u8 vector, u32 dest_apicid) { union irte *irte = (union irte *) entry; irte->fields.vector = vector; irte->fields.destination = dest_apicid; - modify_irte(devid, index, irte); + modify_irte(iommu, devid, index, irte); } -static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, +static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, u8 vector, u32 dest_apicid) { struct irte_ga *irte = (struct irte_ga *) entry; @@ -3025,7 +3109,7 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index, APICID_TO_IRTE_DEST_LO(dest_apicid); irte->hi.fields.destination = APICID_TO_IRTE_DEST_HI(dest_apicid); - modify_irte_ga(devid, index, irte); + modify_irte_ga(iommu, devid, index, irte); } } @@ -3084,7 +3168,7 @@ static int get_devid(struct irq_alloc_info *info) return get_hpet_devid(info->devid); case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: - return get_device_id(msi_desc_to_dev(info->desc)); + return get_device_sbdf_id(msi_desc_to_dev(info->desc)); default: WARN_ON_ONCE(1); return -1; @@ -3113,7 +3197,7 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, int devid, int index, int sub_handle) { struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + struct amd_iommu *iommu = data->iommu; if (!iommu) return; @@ -3164,8 +3248,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct irq_alloc_info *info = arg; struct irq_data *irq_data; struct amd_ir_data *data = NULL; + struct amd_iommu *iommu; struct irq_cfg *cfg; - int i, ret, devid; + int i, ret, devid, seg, sbdf; int index; if (!info) @@ -3181,8 +3266,14 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - devid = get_devid(info); - if (devid < 0) + sbdf = get_devid(info); + if (sbdf < 0) + return -EINVAL; + + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + iommu = __rlookup_amd_iommu(seg, devid); + if (!iommu) return -EINVAL; ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); @@ -3191,9 +3282,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { struct irq_remap_table *table; - struct amd_iommu *iommu; - table = alloc_irq_table(devid, NULL); + table = alloc_irq_table(iommu, devid, NULL); if (table) { if (!table->min_index) { /* @@ -3201,7 +3291,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, * interrupts. */ table->min_index = 32; - iommu = amd_iommu_rlookup_table[devid]; for (i = 0; i < 32; ++i) iommu->irte_ops->set_allocated(table, i); } @@ -3214,10 +3303,10 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX) { bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); - index = alloc_irq_index(devid, nr_irqs, align, + index = alloc_irq_index(iommu, devid, nr_irqs, align, msi_desc_to_pci_dev(info->desc)); } else { - index = alloc_irq_index(devid, nr_irqs, false, NULL); + index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL); } if (index < 0) { @@ -3249,6 +3338,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, goto out_free_data; } + data->iommu = iommu; irq_data->hwirq = (devid << 16) + i; irq_data->chip_data = data; irq_data->chip = &amd_ir_chip; @@ -3265,7 +3355,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, kfree(irq_data->chip_data); } for (i = 0; i < nr_irqs; i++) - free_irte(devid, index + i); + free_irte(iommu, devid, index + i); out_free_parent: irq_domain_free_irqs_common(domain, virq, nr_irqs); return ret; @@ -3284,7 +3374,7 @@ static void irq_remapping_free(struct irq_domain *domain, unsigned int virq, if (irq_data && irq_data->chip_data) { data = irq_data->chip_data; irte_info = &data->irq_2_irte; - free_irte(irte_info->devid, irte_info->index); + free_irte(data->iommu, irte_info->devid, irte_info->index); kfree(data->entry); kfree(data); } @@ -3302,13 +3392,13 @@ static int irq_remapping_activate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = data->iommu; struct irq_cfg *cfg = irqd_cfg(irq_data); if (!iommu) return 0; - iommu->irte_ops->activate(data->entry, irte_info->devid, + iommu->irte_ops->activate(iommu, data->entry, irte_info->devid, irte_info->index); amd_ir_update_irte(irq_data, iommu, data, irte_info, cfg); return 0; @@ -3319,10 +3409,10 @@ static void irq_remapping_deactivate(struct irq_domain *domain, { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = data->iommu; if (iommu) - iommu->irte_ops->deactivate(data->entry, irte_info->devid, + iommu->irte_ops->deactivate(iommu, data->entry, irte_info->devid, irte_info->index); } @@ -3342,8 +3432,8 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, if (devid < 0) return 0; + iommu = __rlookup_amd_iommu((devid >> 16), (devid & 0xffff)); - iommu = amd_iommu_rlookup_table[devid]; return iommu && iommu->ir_domain == d; } @@ -3376,7 +3466,7 @@ int amd_iommu_activate_guest_mode(void *data) entry->hi.fields.vector = ir_data->ga_vector; entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; - return modify_irte_ga(ir_data->irq_2_irte.devid, + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry); } EXPORT_SYMBOL(amd_iommu_activate_guest_mode); @@ -3406,7 +3496,7 @@ int amd_iommu_deactivate_guest_mode(void *data) entry->hi.fields.destination = APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); - return modify_irte_ga(ir_data->irq_2_irte.devid, + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry); } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); @@ -3414,12 +3504,16 @@ EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) { int ret; - struct amd_iommu *iommu; struct amd_iommu_pi_data *pi_data = vcpu_info; struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; struct amd_ir_data *ir_data = data->chip_data; struct irq_2_irte *irte_info = &ir_data->irq_2_irte; - struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid); + struct iommu_dev_data *dev_data; + + if (ir_data->iommu == NULL) + return -EINVAL; + + dev_data = search_dev_data(ir_data->iommu, irte_info->devid); /* Note: * This device has never been set up for guest mode. @@ -3441,10 +3535,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) pi_data->is_guest_mode = false; } - iommu = amd_iommu_rlookup_table[irte_info->devid]; - if (iommu == NULL) - return -EINVAL; - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); @@ -3478,7 +3568,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, * Atomically updates the IRTE with the new destination, vector * and flushes the interrupt entry cache. */ - iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid, + iommu->irte_ops->set_affinity(iommu, ir_data->entry, irte_info->devid, irte_info->index, cfg->vector, cfg->dest_apicid); } @@ -3490,7 +3580,7 @@ static int amd_ir_set_affinity(struct irq_data *data, struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct irq_cfg *cfg = irqd_cfg(data); struct irq_data *parent = data->parent_data; - struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid]; + struct amd_iommu *iommu = ir_data->iommu; int ret; if (!iommu) @@ -3550,14 +3640,13 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) { struct amd_iommu *iommu; struct amd_ir_data *ir_data = (struct amd_ir_data *)data; - int devid = ir_data->irq_2_irte.devid; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry || !entry->lo.fields_vapic.guest_mode) return 0; - iommu = amd_iommu_rlookup_table[devid]; + iommu = ir_data->iommu; if (!iommu) return -ENODEV; @@ -3569,7 +3658,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) } entry->lo.fields_vapic.is_run = is_run; - return __modify_irte_ga(ir_data->irq_2_irte.devid, + return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry); } EXPORT_SYMBOL(amd_iommu_update_ga); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 29a3a62b7a3ac..865153d7f917c 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -51,7 +51,7 @@ struct pasid_state { struct device_state { struct list_head list; - u16 devid; + u32 sbdf; atomic_t count; struct pci_dev *pdev; struct pasid_state **states; @@ -84,35 +84,25 @@ static struct workqueue_struct *iommu_wq; static void free_pasid_states(struct device_state *dev_state); -static u16 device_id(struct pci_dev *pdev) -{ - u16 devid; - - devid = pdev->bus->number; - devid = (devid << 8) | pdev->devfn; - - return devid; -} - -static struct device_state *__get_device_state(u16 devid) +static struct device_state *__get_device_state(u32 sbdf) { struct device_state *dev_state; list_for_each_entry(dev_state, &state_list, list) { - if (dev_state->devid == devid) + if (dev_state->sbdf == sbdf) return dev_state; } return NULL; } -static struct device_state *get_device_state(u16 devid) +static struct device_state *get_device_state(u32 sbdf) { struct device_state *dev_state; unsigned long flags; spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state != NULL) atomic_inc(&dev_state->count); spin_unlock_irqrestore(&state_lock, flags); @@ -520,15 +510,16 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) unsigned long flags; struct fault *fault; bool finish; - u16 tag, devid; + u16 tag, devid, seg_id; int ret; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; finish = (iommu_fault->tag >> 9) & 1; - devid = iommu_fault->device_id; - pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(devid), + seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf); + devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf); + pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid), devid & 0xff); if (!pdev) return -ENODEV; @@ -542,7 +533,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) goto out; } - dev_state = get_device_state(iommu_fault->device_id); + dev_state = get_device_state(iommu_fault->sbdf); if (dev_state == NULL) goto out; @@ -602,7 +593,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, struct pasid_state *pasid_state; struct device_state *dev_state; struct mm_struct *mm; - u16 devid; + u32 sbdf; int ret; might_sleep(); @@ -610,8 +601,8 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); - dev_state = get_device_state(devid); + sbdf = get_pci_sbdf_id(pdev); + dev_state = get_device_state(sbdf); if (dev_state == NULL) return -EINVAL; @@ -685,15 +676,15 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid) { struct pasid_state *pasid_state; struct device_state *dev_state; - u16 devid; + u32 sbdf; might_sleep(); if (!amd_iommu_v2_supported()) return; - devid = device_id(pdev); - dev_state = get_device_state(devid); + sbdf = get_pci_sbdf_id(pdev); + dev_state = get_device_state(sbdf); if (dev_state == NULL) return; @@ -735,7 +726,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) struct iommu_group *group; unsigned long flags; int ret, tmp; - u16 devid; + u32 sbdf; might_sleep(); @@ -752,7 +743,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) if (pasids <= 0 || pasids > (PASID_MASK + 1)) return -EINVAL; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); if (dev_state == NULL) @@ -761,7 +752,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_init(&dev_state->lock); init_waitqueue_head(&dev_state->wq); dev_state->pdev = pdev; - dev_state->devid = devid; + dev_state->sbdf = sbdf; tmp = pasids; for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) @@ -799,7 +790,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids) spin_lock_irqsave(&state_lock, flags); - if (__get_device_state(devid) != NULL) { + if (__get_device_state(sbdf) != NULL) { spin_unlock_irqrestore(&state_lock, flags); ret = -EBUSY; goto out_free_domain; @@ -831,16 +822,16 @@ void amd_iommu_free_device(struct pci_dev *pdev) { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; if (!amd_iommu_v2_supported()) return; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) { spin_unlock_irqrestore(&state_lock, flags); return; @@ -868,18 +859,18 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; int ret; if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) goto out_unlock; @@ -899,18 +890,18 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, { struct device_state *dev_state; unsigned long flags; - u16 devid; + u32 sbdf; int ret; if (!amd_iommu_v2_supported()) return -ENODEV; - devid = device_id(pdev); + sbdf = get_pci_sbdf_id(pdev); spin_lock_irqsave(&state_lock, flags); ret = -EINVAL; - dev_state = __get_device_state(devid); + dev_state = __get_device_state(sbdf); if (dev_state == NULL) goto out_unlock; diff --git a/drivers/iommu/amd/quirks.c b/drivers/iommu/amd/quirks.c index 5120ce4fdce32..79dbb8f33b473 100644 --- a/drivers/iommu/amd/quirks.c +++ b/drivers/iommu/amd/quirks.c @@ -15,7 +15,7 @@ struct ivrs_quirk_entry { u8 id; - u16 devid; + u32 devid; }; enum { @@ -49,7 +49,7 @@ static int __init ivrs_ioapic_quirk_cb(const struct dmi_system_id *d) const struct ivrs_quirk_entry *i; for (i = d->driver_data; i->id != 0 && i->devid != 0; i++) - add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u16 *)&i->devid, 0); + add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u32 *)&i->devid, 0); return 0; }