diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2f8f66100ea0f7..671ab0dd746d31 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5569,8 +5569,10 @@ it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst) swiotlb= [ARM,IA-64,PPC,MIPS,X86] - Format: { | force | noforce } + Format: { [,] | force | noforce } -- Number of I/O TLB slabs + -- Second integer after comma. Number of swiotlb + areas with their own lock. Must be power of 2. force -- force using of bounce buffers even if they wouldn't be automatically used by the kernel noforce -- Never use bounce buffers (for debugging) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index af8e01f28e4325..fa9178a6c58804 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -1129,6 +1130,9 @@ static int __init acpi_parse_madt_lapic_entries(void) return count; } + /* This does not take overrides into consideration */ + swiotlb_hint_cpus(max(count, x2count)); + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI, acpi_parse_x2apic_nmi, 0); count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 0b7fb28cde0d46..e12a92564bafc9 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -38,6 +38,7 @@ enum swiotlb_force { extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); +void swiotlb_hint_cpus(int cpus); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); extern int swiotlb_late_init_with_default_size(size_t default_size); @@ -70,7 +71,25 @@ struct io_tlb_slot { }; /** - * struct io_tlb_mem - IO TLB Memory Pool Descriptor + * struct io_tlb_area - IO TLB memory area descriptor + * + * This is a single area with a single lock. + * + * @used: The number of used IO TLB block. + * @list: The free list describing the number of free entries available + * from each index. + * @lock: The lock to protect the above data structures in the map and + * unmap calls. + */ + +struct io_tlb_area { + unsigned long used; + struct list_head free_slots; + spinlock_t lock; +}; + +/** + * struct io_tlb_mem - io tlb memory pool descriptor * * @start: The start address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this @@ -85,8 +104,6 @@ struct io_tlb_slot { * @index: The index to start searching in the next round. * @orig_addr: The original address corresponding to a mapped entry. * @alloc_size: Size of the allocated buffer. - * @lock: The lock to protect the above data structures in the map and - * unmap calls. * @debugfs: The dentry to debugfs. * @late_alloc: %true if allocated using the page allocator * @force_bounce: %true if swiotlb bouncing is forced @@ -98,13 +115,11 @@ struct io_tlb_mem { phys_addr_t start; phys_addr_t end; unsigned long nslabs; - unsigned long used; - struct list_head free_slots; - spinlock_t lock; struct dentry *debugfs; bool late_alloc; bool force_bounce; bool for_alloc; + struct io_tlb_area *areas; struct io_tlb_slot *slots; unsigned long *bitmap; }; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index e11cf11688d935..429ee78bf6002c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -68,6 +68,8 @@ #define INVALID_PHYS_ADDR (~(phys_addr_t)0) +#define NUM_AREAS_DEFAULT 1 + enum swiotlb_force swiotlb_force; struct io_tlb_mem io_tlb_default_mem; @@ -80,14 +82,60 @@ static unsigned int max_segment; static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; +static __read_mostly unsigned int area_index_shift; +static __read_mostly unsigned int area_bits; +static __read_mostly int num_areas = NUM_AREAS_DEFAULT; + +static __init int setup_areas(int n) +{ + unsigned long nslabs; + + if (n < 1 || !is_power_of_2(n)) { + pr_err("swiotlb: Invalid areas parameter %d\n", n); + return -EINVAL; + } + + /* Round up number of slabs to the next power of 2. + * The last area is going be smaller than the rest if default_nslabs is + * not power of two. + */ + nslabs = roundup_pow_of_two(default_nslabs); + + pr_info("swiotlb: Using %d areas\n", n); + num_areas = n; + area_index_shift = __fls(nslabs) - __fls(num_areas); + area_bits = __fls(n); + return 0; +} + +/* + * Can be called from architecture specific code when swiotlb is set up before + * possible cpus are setup. + */ + +void __init swiotlb_hint_cpus(int cpus) +{ + if (num_areas == NUM_AREAS_DEFAULT && cpus > 1) { + if (!is_power_of_2(cpus)) + cpus = 1U << (__fls(cpus) + 1); + setup_areas(cpus); + } +} + static int __init setup_io_tlb_npages(char *str) { + int ret = 0; + if (isdigit(*str)) { /* avoid tail segment of size < IO_TLB_SEGSIZE */ default_nslabs = ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE); } + if (*str == ',') + ++str; + if (isdigit(*str)) + ret = setup_areas(simple_strtoul(str, &str, 0)); if (*str == ',') ++str; if (!strcmp(str, "force")) @@ -95,7 +143,7 @@ setup_io_tlb_npages(char *str) else if (!strcmp(str, "noforce")) swiotlb_force = SWIOTLB_NO_FORCE; - return 0; + return ret; } early_param("swiotlb", setup_io_tlb_npages); @@ -129,6 +177,7 @@ void __init swiotlb_adjust_size(unsigned long size) return; size = ALIGN(size, IO_TLB_SIZE); default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); + setup_areas(num_areas); pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20); } @@ -184,18 +233,22 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->nslabs = nslabs; mem->start = start; mem->end = mem->start + bytes; - INIT_LIST_HEAD(&mem->free_slots); mem->late_alloc = late_alloc; if (swiotlb_force == SWIOTLB_FORCE) mem->force_bounce = true; - spin_lock_init(&mem->lock); + for (i = 0; i < num_areas; i++) { + INIT_LIST_HEAD(&mem->areas[i].free_slots); + spin_lock_init(&mem->areas[i].lock); + } for (i = 0; i < mem->nslabs; i++) { + int aindex = area_index_shift ? i >> area_index_shift : 0; __set_bit(i, mem->bitmap); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; - list_add_tail(&mem->slots[i].node, &mem->free_slots); + list_add_tail(&mem->slots[i].node, + &mem->areas[aindex].free_slots); } memset(vaddr, 0, bytes); } @@ -217,6 +270,10 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) if (!mem->slots) panic("%s: Failed to allocate %zu bytes align=0x%lx\n", __func__, alloc_size, PAGE_SIZE); + mem->areas = memblock_alloc(sizeof(struct io_tlb_area) * num_areas, + SMP_CACHE_BYTES); + if (!mem->areas) + panic("Cannot allocate io_tlb_areas"); mem->bitmap = memblock_alloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), SMP_CACHE_BYTES); if (!mem->bitmap) @@ -311,6 +368,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) { struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long bytes = nslabs << IO_TLB_SHIFT; + int order; if (swiotlb_force == SWIOTLB_NO_FORCE) return 0; @@ -320,14 +378,22 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) return -ENOMEM; mem->bitmap = kzalloc(DIV_ROUND_UP(nslabs, BITS_PER_BYTE), GFP_KERNEL); - mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(array_size(sizeof(*mem->slots), nslabs))); + order = get_order(array_size(sizeof(*mem->slots), nslabs)); + mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!mem->slots || !mem->bitmap) { kfree(mem->bitmap); kfree(mem->slots); return -ENOMEM; } + mem->areas = (struct io_tlb_area *)kcalloc(num_areas, + sizeof(struct io_tlb_area), + GFP_KERNEL); + if (!mem->areas) { + free_pages((unsigned long)mem->slots, order); + return -ENOMEM; + } + set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true); @@ -352,9 +418,12 @@ void __init swiotlb_exit(void) set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT); if (mem->late_alloc) { + kfree(mem->areas); free_pages(tbl_vaddr, get_order(tbl_size)); free_pages((unsigned long)mem->slots, get_order(slots_size)); } else { + memblock_free_late(__pa(mem->areas), + num_areas * sizeof(struct io_tlb_area)); memblock_free_late(mem->start, tbl_size); memblock_free_late(__pa(mem->slots), slots_size); } @@ -457,14 +526,32 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask) return nr_slots(boundary_mask + 1); } +static inline unsigned long area_nslabs(struct io_tlb_mem *mem) +{ + return area_index_shift ? (1UL << area_index_shift) + 1 : + mem->nslabs; +} + +static inline unsigned int area_start(int aindex) +{ + return aindex << area_index_shift; +} + +static inline unsigned int area_end(struct io_tlb_mem *mem, int aindex) +{ + return area_start(aindex) + area_nslabs(mem); +} + /* * Find a suitable number of IO TLB entries size that will fit this request and * allocate a buffer from that IO TLB pool. */ -static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, - size_t alloc_size) +static int swiotlb_do_find_slots(struct io_tlb_mem *mem, + struct io_tlb_area *area, + int area_index, + struct device *dev, phys_addr_t orig_addr, + size_t alloc_size) { - struct io_tlb_mem *mem = dev->dma_io_tlb_mem; struct io_tlb_slot *slot, *tmp; unsigned long boundary_mask = dma_get_seg_boundary(dev); dma_addr_t tbl_dma_addr = @@ -478,12 +565,13 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, unsigned long flags; BUG_ON(!nslots); + BUG_ON(area_index >= num_areas); - spin_lock_irqsave(&mem->lock, flags); - if (unlikely(nslots > mem->nslabs - mem->used)) + spin_lock_irqsave(&area->lock, flags); + if (unlikely(nslots > area_nslabs(mem) - area->used)) goto not_found; - list_for_each_entry_safe(slot, tmp, &mem->free_slots, node) { + list_for_each_entry_safe(slot, tmp, &area->free_slots, node) { index = slot - mem->slots; if (orig_addr && (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != @@ -516,7 +604,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, } not_found: - spin_unlock_irqrestore(&mem->lock, flags); + spin_unlock_irqrestore(&area->lock, flags); return -1; found: @@ -527,12 +615,42 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, list_del(&mem->slots[i].node); } - mem->used += nslots; + area->used += nslots; - spin_unlock_irqrestore(&mem->lock, flags); + spin_unlock_irqrestore(&area->lock, flags); return index; } +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size) +{ + struct io_tlb_mem *mem = dev->dma_io_tlb_mem; + int start = raw_smp_processor_id() & ((1U << area_bits) - 1); + int i, index; + + i = start; + do { + index = swiotlb_do_find_slots(mem, mem->areas + i, i, + dev, orig_addr, alloc_size); + if (index >= 0) + return index; + if (++i >= num_areas) + i = 0; + } while (i != start); + return -1; +} + +/* Somewhat racy estimate */ +static unsigned long mem_used(struct io_tlb_mem *mem) +{ + int i; + unsigned long used = 0; + + for (i = 0; i < num_areas; i++) + used += mem->areas[i].used; + return used; +} + phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, size_t mapping_size, size_t alloc_size, enum dma_data_direction dir, unsigned long attrs) @@ -560,7 +678,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, mem->nslabs, mem->used); + alloc_size, mem->nslabs, mem_used(mem)); return (phys_addr_t)DMA_MAPPING_ERROR; } @@ -585,9 +703,14 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) unsigned int offset = swiotlb_align_offset(dev, tlb_addr); int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; int nslots = nr_slots(mem->slots[index].alloc_size + offset); + int aindex = area_index_shift ? index >> area_index_shift : 0; + struct io_tlb_area *area = mem->areas + aindex; int i; - spin_lock_irqsave(&mem->lock, flags); + BUG_ON(aindex >= num_areas); + + spin_lock_irqsave(&area->lock, flags); + /* * Return the slots to swiotlb, updating bitmap to indicate * corresponding entries are free. @@ -596,11 +719,11 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) __set_bit(i, mem->bitmap); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; - list_add(&mem->slots[i].node, &mem->free_slots); + list_add(&mem->slots[i].node, &area->free_slots); } - mem->used -= nslots; - spin_unlock_irqrestore(&mem->lock, flags); + area->used -= nslots; + spin_unlock_irqrestore(&area->lock, flags); } /* @@ -681,17 +804,27 @@ bool is_swiotlb_active(struct device *dev) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - return mem && mem->nslabs; + return mem && mem->start != mem->end; } EXPORT_SYMBOL_GPL(is_swiotlb_active); #ifdef CONFIG_DEBUG_FS + +static int used_get(void *data, u64 *val) +{ + struct io_tlb_mem *mem = (struct io_tlb_mem *)data; + *val = mem_used(mem); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(used_fops, used_get, NULL, "%llu\n"); + static struct dentry *debugfs_dir; static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem) { debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); - debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used); + debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem, &used_fops); } static int __init swiotlb_create_default_debugfs(void)