diff --git a/klib/cloud_init.c b/klib/cloud_init.c index 075a08756..31de281d7 100644 --- a/klib/cloud_init.c +++ b/klib/cloud_init.c @@ -82,7 +82,7 @@ static enum cloud cloud_detect(void) static int cloud_init_parse_vector(value config, int (*parse_each)(tuple, vector), vector tasks) { - if (!(is_tuple(config) || is_vector(config))) + if (!is_composite(config)) return KLIB_INIT_FAILED; /* allow parsing either tuple or vector for backward compatibility with older ops/tfs... */ value v; diff --git a/klib/firewall.c b/klib/firewall.c index 617f4390c..7b411bede 100644 --- a/klib/firewall.c +++ b/klib/firewall.c @@ -555,7 +555,7 @@ int init(status_handler complete) value rules = get(config, sym(rules)); if (!rules) return KLIB_INIT_OK; - if (!(is_tuple(rules) || is_vector(rules))) { + if (!is_composite(rules)) { rprintf("invalid firewall rules\n"); return KLIB_INIT_FAILED; } diff --git a/platform/pc/service.c b/platform/pc/service.c index f774749fd..58cfa5c52 100644 --- a/platform/pc/service.c +++ b/platform/pc/service.c @@ -243,16 +243,6 @@ static void __attribute__((noinline)) init_service_new_stack() { kernel_heaps kh = get_kernel_heaps(); early_init_debug("in init_service_new_stack"); - bytes pagesize = is_low_memory_machine() ? PAGESIZE : PAGESIZE_2M; - init_integers(locking_heap_wrapper(heap_general(kh), - allocate_tagged_region(kh, tag_integer, pagesize))); - init_tuples(locking_heap_wrapper(heap_general(kh), - allocate_tagged_region(kh, tag_table_tuple, pagesize))); - init_symbols(allocate_tagged_region(kh, tag_symbol, pagesize), heap_locked(kh)); - heap vh = allocate_tagged_region(kh, tag_vector, pagesize); - init_vectors(locking_heap_wrapper(heap_general(kh), vh), heap_locked(kh)); - heap sh = allocate_tagged_region(kh, tag_string, pagesize); - init_strings(locking_heap_wrapper(heap_general(kh), sh), heap_locked(kh)); for_regions(e) { switch (e->type) { @@ -265,7 +255,6 @@ static void __attribute__((noinline)) init_service_new_stack() } } - init_management(allocate_tagged_region(kh, tag_function_tuple, pagesize), heap_general(kh)); early_init_debug("init_hwrand"); init_hwrand(); @@ -332,15 +321,10 @@ id_heap init_physical_id_heap(heap h) early_init_debug("physical memory:"); for_regions(e) { if (e->type == REGION_PHYSICAL) { - /* Align for 2M pages */ u64 base = e->base; - u64 end = base + e->length; - u64 page2m_mask = MASK(PAGELOG_2M); - base = (base + page2m_mask) & ~page2m_mask; - end &= ~MASK(PAGELOG); - if (base >= end) + u64 length = e->length; + if (length == 0) continue; - u64 length = end - base; #ifdef INIT_DEBUG early_debug("INIT: ["); early_debug_u64(base); @@ -405,26 +389,6 @@ static inline void jump_to_virtual(void) { 1: \n"); } -static void cmdline_parse(const char *cmdline) -{ - early_init_debug("parsing cmdline"); - const char *opt_end, *prefix_end; - while (*cmdline) { - opt_end = runtime_strchr(cmdline, ' '); - if (!opt_end) - opt_end = cmdline + runtime_strlen(cmdline); - prefix_end = runtime_strchr(cmdline, '.'); - if (prefix_end && (prefix_end < opt_end)) { - int prefix_len = prefix_end - cmdline; - if ((prefix_len == sizeof("virtio_mmio") - 1) && - !runtime_memcmp(cmdline, "virtio_mmio", prefix_len)) - virtio_mmio_parse(get_kernel_heaps(), prefix_end + 1, - opt_end - (prefix_end + 1)); - } - cmdline = opt_end + 1; - } -} - extern void *READONLY_END; /* Returns the number of pages have been allocated from the temporary page table region. */ @@ -495,7 +459,6 @@ void init_service(u64 rdi, u64 rsi) */ assert(u64_from_pointer(params) + cmdline_size < MBR_ADDRESS); runtime_memcpy(params, cmdline, cmdline_size); - params[cmdline_size] = '\0'; cmdline = (char *)params; } @@ -512,7 +475,7 @@ void init_service(u64 rdi, u64 rsi) init_page_initial_map(pointer_from_u64(PAGES_BASE), initial_pages); init_kernel_heaps(); if (cmdline) - cmdline_parse(cmdline); + create_region(u64_from_pointer(cmdline), cmdline_size, REGION_CMDLINE); u64 stack_size = 32*PAGESIZE; u64 stack_location = allocate_u64((heap)heap_page_backed(get_kernel_heaps()), stack_size); stack_location += stack_size - STACK_ALIGNMENT; @@ -623,3 +586,23 @@ void detect_devices(kernel_heaps kh, storage_attach sa) init_virtio_balloon(kh); init_virtio_rng(kh); } + +void cmdline_consume(const char *opt_name, cmdline_handler h) +{ + for_regions(e) { + if (e->type == REGION_CMDLINE) { + e->length = cmdline_parse(pointer_from_u64(e->base), e->length, opt_name, h); + return; + } + } +} + +void boot_params_apply(tuple t) +{ + for_regions(e) { + if (e->type == REGION_CMDLINE) { + cmdline_apply(pointer_from_u64(e->base), e->length, t); + return; + } + } +} diff --git a/platform/riscv-virt/service.c b/platform/riscv-virt/service.c index e3bb30850..e2e8ce95d 100644 --- a/platform/riscv-virt/service.c +++ b/platform/riscv-virt/service.c @@ -47,7 +47,7 @@ id_heap init_physical_id_heap(heap h) u64 end = PHYSMEM_BASE + mem_size; u64 bootstrap_size = init_bootstrap_heap(end - base); map(BOOTSTRAP_BASE, base, bootstrap_size, pageflags_writable(pageflags_memory())); - base = pad(base + bootstrap_size, PAGESIZE_2M); + base += bootstrap_size; init_debug("\nfree base "); init_debug_u64(base); init_debug("\nend "); @@ -102,17 +102,6 @@ static void __attribute__((noinline)) init_service_new_stack(void) { init_debug("in init_service_new_stack\n"); kernel_heaps kh = get_kernel_heaps(); - bytes pagesize = is_low_memory_machine() ? PAGESIZE : PAGESIZE_2M; - init_integers(locking_heap_wrapper(heap_general(kh), - allocate_tagged_region(kh, tag_integer, pagesize))); - init_tuples(locking_heap_wrapper(heap_general(kh), - allocate_tagged_region(kh, tag_table_tuple, pagesize))); - init_symbols(allocate_tagged_region(kh, tag_symbol, pagesize), heap_locked(kh)); - heap vh = allocate_tagged_region(kh, tag_vector, pagesize); - init_vectors(locking_heap_wrapper(heap_general(kh), vh), heap_locked(kh)); - heap sh = allocate_tagged_region(kh, tag_string, pagesize); - init_strings(locking_heap_wrapper(heap_general(kh), sh), heap_locked(kh)); - init_management(allocate_tagged_region(kh, tag_function_tuple, pagesize), heap_general(kh)); init_debug("calling runtime init\n"); kernel_runtime_init(kh); while(1); diff --git a/platform/virt/service.c b/platform/virt/service.c index 0696d7fb8..5d0dc4ec9 100644 --- a/platform/virt/service.c +++ b/platform/virt/service.c @@ -110,9 +110,6 @@ static void add_heap_range_internal(id_heap h, range r, range *remainder) r = *remainder; *remainder = tmp; } - r.start = pad(r.start, PAGESIZE_2M); - if (r.start >= r.end) - return; init_debug("adding range [0x"); init_debug_u64(r.start); init_debug(" 0x"); @@ -198,7 +195,7 @@ id_heap init_physical_id_heap(heap h) u64 end = base + get_memory_size(pointer_from_u64(DEVICETREE_BLOB_BASE)); u64 bootstrap_size = init_bootstrap_heap(end - base); map(BOOTSTRAP_BASE, base, bootstrap_size, pageflags_writable(pageflags_memory())); - base = pad(base + bootstrap_size, PAGESIZE_2M); + base += bootstrap_size; init_debug("\nfree base "); init_debug_u64(base); init_debug("\nend "); @@ -247,17 +244,6 @@ static void __attribute__((noinline)) init_service_new_stack(void) { init_debug("in init_service_new_stack\n"); kernel_heaps kh = get_kernel_heaps(); - bytes pagesize = is_low_memory_machine() ? PAGESIZE : PAGESIZE_2M; - init_integers(locking_heap_wrapper(heap_general(kh), - allocate_tagged_region(kh, tag_integer, pagesize))); - init_tuples(locking_heap_wrapper(heap_general(kh), - allocate_tagged_region(kh, tag_table_tuple, pagesize))); - init_symbols(allocate_tagged_region(kh, tag_symbol, pagesize), heap_locked(kh)); - heap vh = allocate_tagged_region(kh, tag_vector, pagesize); - init_vectors(locking_heap_wrapper(heap_general(kh), vh), heap_locked(kh)); - heap sh = allocate_tagged_region(kh, tag_string, pagesize); - init_strings(locking_heap_wrapper(heap_general(kh), sh), heap_locked(kh)); - init_management(allocate_tagged_region(kh, tag_function_tuple, pagesize), heap_general(kh)); init_debug("calling runtime init\n"); kernel_runtime_init(kh); while(1); diff --git a/src/aarch64/kernel_machine.c b/src/aarch64/kernel_machine.c index 54ec46eaa..fa71b4960 100644 --- a/src/aarch64/kernel_machine.c +++ b/src/aarch64/kernel_machine.c @@ -45,7 +45,7 @@ static u64 tag_alloc(heap h, bytes s) return a; } -heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize) +heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize, boolean locking) { heap h = heap_locked(kh); struct tagheap *th = allocate(h, sizeof(struct tagheap)); diff --git a/src/aarch64/kernel_machine.h b/src/aarch64/kernel_machine.h index 23425decf..8f7da0361 100644 --- a/src/aarch64/kernel_machine.h +++ b/src/aarch64/kernel_machine.h @@ -205,6 +205,9 @@ static inline void wait_for_interrupt(void) enable_interrupts(); } +#define cmdline_consume(o, h) (void)(h) +#define boot_params_apply(t) + /* locking constructs */ #include diff --git a/src/drivers/console.c b/src/drivers/console.c index 6236d4e76..697377f38 100644 --- a/src/drivers/console.c +++ b/src/drivers/console.c @@ -104,7 +104,7 @@ void config_console(tuple root) value v = get(root, sym(consoles)); if (v == 0) return; - if (!(is_vector(v) || is_tuple(v))) { + if (!is_composite(v)) { msg_err("consoles config is neither vector nor tuple\n"); return; } diff --git a/src/fs/tlog.c b/src/fs/tlog.c index 3a88fd090..5b1ea720b 100644 --- a/src/fs/tlog.c +++ b/src/fs/tlog.c @@ -499,7 +499,7 @@ closure_function(2, 1, void, log_switch_complete, if (is_ok(s)) table_foreach(old_tl->dictionary, k, v) { (void)v; - if ((is_tuple(k) || is_vector(k)) && !table_find(new_tl->dictionary, k)) { + if (is_composite(k) && !table_find(new_tl->dictionary, k)) { tlog_debug(" destroying value %p\n", __func__, k); destruct_value(k, false); } @@ -842,7 +842,7 @@ closure_function(4, 1, void, log_read_complete, table newdict = allocate_table(tl->h, identity_key, pointer_equal); table_foreach(tl->dictionary, k, v) { tlog_debug(" dict swap: k %p, v %p, type %d\n", k, v, tagof(v)); - assert(is_tuple(v) || is_symbol(v) || is_vector(v)); + assert(is_composite(v) || is_symbol(v)); table_set(newdict, v, k); } deallocate_table(tl->dictionary); diff --git a/src/kernel/init.c b/src/kernel/init.c index 2f1480b85..1422387c2 100644 --- a/src/kernel/init.c +++ b/src/kernel/init.c @@ -189,10 +189,12 @@ closure_function(2, 2, void, fsstarted, root_fs = fs; storage_set_root_fs(fs); - wrapped_root = tuple_notifier_wrap(filesystem_getroot(fs)); + tuple fs_root = filesystem_getroot(fs); + wrapped_root = tuple_notifier_wrap(fs_root, true); assert(wrapped_root != INVALID_ADDRESS); - // XXX use wrapped_root after root fs is separate - tuple root = filesystem_getroot(root_fs); + tuple root = (tuple)wrapped_root; + boot_params_apply(root); + reclaim_regions(); /* for pc: no accessing regions after this point */ tuple mounts = get_tuple(root, sym(mounts)); if (mounts) storage_set_mountpoints(mounts); @@ -227,10 +229,111 @@ closure_function(2, 2, void, fsstarted, closure_finish(); symbol booted = sym(booted); if (!get(root, booted)) - filesystem_write_eav((tfs)fs, root, booted, null_value, false); + filesystem_write_eav((tfs)fs, fs_root, booted, null_value, false); config_console(root); } +static char *cmdline_next_option(char *cmdline_start, int cmdline_len, int *opt_len) +{ + while (cmdline_len > 0) { + if (*cmdline_start == ' ') { + cmdline_start++; + cmdline_len--; + } else { + break; + } + } + char *opt_end = runtime_memchr(cmdline_start, ' ', cmdline_len); + if (!opt_end) + opt_end = cmdline_start + cmdline_len; + if (opt_end > cmdline_start) { + *opt_len = opt_end - cmdline_start; + return cmdline_start; + } + return 0; +} + +static int cmdline_get_prefix(char *opt_start, int opt_len) +{ + char *prefix_end = runtime_memchr(opt_start, '.', opt_len); + return (prefix_end ? (prefix_end - opt_start) : 0); +} + +/* Removes consumed options from command line; returns updated command line length. */ +int cmdline_parse(char *cmdline_start, int cmdline_len, const char *opt_name, cmdline_handler h) +{ + init_debug("%s (%d): option %s", __func__, cmdline_len, opt_name); + char *cmdline_end = cmdline_start + cmdline_len; + char *opt_start; + int opt_len; + int name_len = runtime_strlen(opt_name); + char *p = cmdline_start; + while ((opt_start = cmdline_next_option(p, cmdline_end - p, &opt_len))) { + char *opt_end = opt_start + opt_len; + int prefix_len = cmdline_get_prefix(opt_start, opt_len); + if ((prefix_len == name_len) && !runtime_memcmp(opt_start, opt_name, prefix_len)) { + char *value_start = opt_start + prefix_len + 1; + apply(h, value_start, opt_end - value_start); + + /* consume parsed option */ + runtime_memcpy(opt_start, opt_end, cmdline_end - opt_end); + cmdline_len -= opt_len; + cmdline_end -= opt_len; + p = opt_start; + } else { + p = opt_end; + } + } + init_debug("%s: updated length %d", __func__, cmdline_len); + return cmdline_len; +} + +static void cmdline_apply_option(char *opt_start, int opt_len, value cfg) +{ + char *name_end = runtime_memchr(opt_start, '=', opt_len); + if (!name_end) + return; + char *opt_end = opt_start + opt_len; + int prefix_len = cmdline_get_prefix(opt_start, opt_len); + if (prefix_len && (prefix_len < name_end - opt_start)) { + symbol s = intern(alloca_wrap_buffer(opt_start, prefix_len)); + value child = get(cfg, s); + if (!child) { + child = allocate_tuple(); + assert(child != INVALID_ADDRESS); + set(cfg, s, child); + } + if (is_composite(child)) { + char *value_start = opt_start + prefix_len + 1; + cmdline_apply_option(value_start, opt_end - value_start, child); + } + } else { + char *value_start = name_end + 1; + symbol name = intern(alloca_wrap_buffer(opt_start, name_end - opt_start)); + string val; + if (value_start < opt_end) { + val = allocate_string(opt_end - value_start); + assert(val != INVALID_ADDRESS); + buffer_append(val, value_start, opt_end - value_start); + } else { + val = 0; + } + set(cfg, name, val); + } +} + +void cmdline_apply(char *cmdline_start, int cmdline_len, tuple t) +{ + char *opt_start; + int opt_len; + while ((opt_start = cmdline_next_option(cmdline_start, cmdline_len, &opt_len))) { + cmdline_apply_option(opt_start, opt_len, t); + char *opt_end = opt_start + opt_len; + cmdline_len -= opt_end - cmdline_start; + cmdline_start = opt_end; + } +} + #ifdef MM_DEBUG #define mm_debug(x, ...) do {tprintf(sym(mm), 0, x, ##__VA_ARGS__);} while(0) #else @@ -331,12 +434,11 @@ filesystem get_root_fs(void) tuple get_root_tuple(void) { - return root_fs ? filesystem_getroot(root_fs) : 0; + return (tuple)wrapped_root; } void register_root_notify(symbol s, set_value_notify n) { - // XXX to be restored when root fs tuple is separated from root tuple tuple_notifier_register_set_notify(wrapped_root, s, n); } @@ -448,8 +550,17 @@ void kernel_runtime_init(kernel_heaps kh) { heap misc = heap_general(kh); heap locked = heap_locked(kh); + boolean lowmem = is_low_memory_machine(); init_heaps = kh; + bytes pagesize = lowmem ? PAGESIZE : PAGESIZE_2M; + init_integers(allocate_tagged_region(kh, tag_integer, pagesize, true)); + init_tuples(allocate_tagged_region(kh, tag_table_tuple, pagesize, true)); + init_symbols(allocate_tagged_region(kh, tag_symbol, pagesize, false), locked); + init_vectors(allocate_tagged_region(kh, tag_vector, pagesize, true), locked); + init_strings(allocate_tagged_region(kh, tag_string, pagesize, true), locked); + init_management(allocate_tagged_region(kh, tag_function_tuple, pagesize, true), locked); + /* runtime and console init */ #ifdef INIT_DEBUG early_debug("kernel_runtime_init\n"); @@ -460,8 +571,7 @@ void kernel_runtime_init(kernel_heaps kh) dma_init(kh); list_init(&mm_cleaners); spin_lock_init(&mm_lock); - u64 memory_reserve = is_low_memory_machine() ? PAGECACHE_LOWMEM_MEMORY_RESERVE : - PAGECACHE_MEMORY_RESERVE; + u64 memory_reserve = lowmem ? PAGECACHE_LOWMEM_MEMORY_RESERVE : PAGECACHE_MEMORY_RESERVE; init_pagecache(locked, reserve_heap_wrapper(misc, (heap)heap_page_backed(kh), memory_reserve), reserve_heap_wrapper(misc, (heap)heap_physical(kh), memory_reserve), PAGESIZE); mem_cleaner pc_cleaner = closure(misc, mm_pagecache_cleaner); @@ -474,7 +584,6 @@ void kernel_runtime_init(kernel_heaps kh) init_platform_devices(kh); init_symtab(kh); read_kernel_syms(); - reclaim_regions(); /* for pc: no accessing regions after this point */ shutdown_completions = allocate_vector(locked, SHUTDOWN_COMPLETIONS_SIZE); init_debug("init_kernel_contexts"); @@ -605,7 +714,7 @@ static boolean vm_exit_match(u8 exit_code, tuple config, symbol option, boolean return (((action_code == exit_code) && !neq) || ((action_code != exit_code) && neq)); } - } else if (is_vector(config_option)) { + } else if (is_composite(config_option)) { for (int i = 0; get_u64(config_option, intern_u64(i), &action_code); i++) { if (action_code == exit_code) return true; diff --git a/src/kernel/kernel.h b/src/kernel/kernel.h index fd00f0c27..9a915d559 100644 --- a/src/kernel/kernel.h +++ b/src/kernel/kernel.h @@ -5,6 +5,8 @@ #include #endif +typedef closure_type(cmdline_handler, void, const char *, int); + #ifdef KERNEL void runloop_target(void) __attribute__((noreturn)); #endif @@ -751,6 +753,9 @@ void kernel_runtime_init(kernel_heaps kh); void read_kernel_syms(void); void reclaim_regions(void); +int cmdline_parse(char *cmdline_start, int cmdline_len, const char *opt_name, cmdline_handler h); +void cmdline_apply(char *cmdline_start, int cmdline_len, tuple t); + boolean breakpoint_insert(heap h, u64 a, u8 type, u8 length, thunk completion); boolean breakpoint_remove(heap h, u32 a, thunk completion); void destruct_context(context c); @@ -798,7 +803,7 @@ void unmap_and_free_phys(u64 virtual, u64 length); #if !defined(BOOT) -heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize); +heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize, boolean locking); heap locking_heap_wrapper(heap meta, heap parent); #endif diff --git a/src/kernel/klib.c b/src/kernel/klib.c index 9bb6ace22..d0c1e2983 100644 --- a/src/kernel/klib.c +++ b/src/kernel/klib.c @@ -296,7 +296,7 @@ void init_klib(kernel_heaps kh, void *fs, tuple config_root, status_handler comp heap h = heap_locked(kh); klib_kh = kh; klib_fs = (filesystem)fs; - klib_loaded = allocate_vector(heap_general(kh), 4); + klib_loaded = allocate_vector(h, 4); assert(klib_loaded != INVALID_ADDRESS); extern u8 END; diff --git a/src/kernel/region.h b/src/kernel/region.h index b7a7b90c3..e8c9bfc9c 100644 --- a/src/kernel/region.h +++ b/src/kernel/region.h @@ -13,6 +13,7 @@ typedef struct region *region; #define REGION_PHYSICAL 1 /* available physical memory */ #define REGION_DEVICE 2 /* e820 physical region configured for i/o */ #define REGION_INITIAL_PAGES 10 /* for page table allocations in stage2 and early stage3 */ +#define REGION_CMDLINE 11 /* kernel command line */ #define REGION_FILESYSTEM 12 /* offset on disk for the filesystem, see if we can get disk info from the bios */ #define REGION_KERNIMAGE 13 /* location of kernel elf image loaded by stage2 */ #define REGION_RECLAIM 14 /* areas to be unmapped and reclaimed in stage3 (only stage2 stack presently) */ diff --git a/src/kernel/stage3.c b/src/kernel/stage3.c index 3d7fc1644..7bfb966ba 100644 --- a/src/kernel/stage3.c +++ b/src/kernel/stage3.c @@ -90,7 +90,6 @@ static void init_kernel_heaps_management(tuple root) set(heaps, sym(physical), heap_management((heap)heap_physical(kh))); set(heaps, sym(general), heap_management((heap)heap_general(kh))); set(heaps, sym(locked), heap_management((heap)heap_locked(kh))); - set(heaps, sym(no_encode), null_value); set(root, sym(heaps), heaps); } @@ -135,7 +134,7 @@ closure_function(6, 0, void, startup, filesystem_set_readonly(fs); value p = get(root, sym(program)); assert(p && is_string(p)); - tuple pro = resolve_path(root, split(general, p, '/')); + tuple pro = resolve_path(filesystem_getroot(fs), split(general, p, '/')); if (!pro) halt("unable to resolve program path \"%b\"\n", p); program_set_perms(root, pro); diff --git a/src/kernel/storage.c b/src/kernel/storage.c index 5357fd475..59d964590 100644 --- a/src/kernel/storage.c +++ b/src/kernel/storage.c @@ -40,7 +40,6 @@ static struct { static void notify_mount_change_locked(void); /* Called with mutex locked. */ -// XXX this won't work with wrapped root... static volume storage_get_volume(tuple root) { list_foreach(&storage.volumes, e) { diff --git a/src/kernel/tracelog.c b/src/kernel/tracelog.c index 09db7b385..437dfd368 100644 --- a/src/kernel/tracelog.c +++ b/src/kernel/tracelog.c @@ -155,7 +155,7 @@ closure_function(1, 2, boolean, match_attrs, value tv = get(bound(attrs), a); if (!tv) return false; - if (is_tuple(v) || is_vector(v)) { + if (is_composite(v)) { /* We support either a single value for this attribute or a set of acceptable values (as a tuple-encoded vector). */ boolean match = false; diff --git a/src/net/netsyscall.c b/src/net/netsyscall.c index de6d5a6f0..da434f156 100755 --- a/src/net/netsyscall.c +++ b/src/net/netsyscall.c @@ -2630,12 +2630,13 @@ boolean netsyscall_init(unix_heaps uh, tuple cfg) else so_rcvbuf = DEFAULT_SO_RCVBUF; kernel_heaps kh = (kernel_heaps)uh; - caching_heap socket_cache = allocate_objcache(heap_general(kh), (heap)heap_page_backed(kh), + heap h = heap_locked(kh); + caching_heap socket_cache = allocate_objcache(h, (heap)heap_page_backed(kh), sizeof(struct netsock), PAGESIZE, true); if (socket_cache == INVALID_ADDRESS) return false; uh->socket_cache = socket_cache; - net_loop_poll = closure(heap_general(kh), netsock_poll); + net_loop_poll = closure(h, netsock_poll); netlink_init(); vsock_init(); return true; diff --git a/src/riscv64/kernel_machine.c b/src/riscv64/kernel_machine.c index d72034173..744489e08 100644 --- a/src/riscv64/kernel_machine.c +++ b/src/riscv64/kernel_machine.c @@ -9,7 +9,7 @@ #define tag_debug(x, ...) #endif -heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize) +heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize, boolean locking) { heap h = heap_locked(kh); heap p = (heap)heap_physical(kh); @@ -25,7 +25,9 @@ heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize) /* reserve area in virtual_huge */ assert(id_heap_set_area(heap_virtual_huge(kh), tag_base, tag_length, true, true)); - return allocate_mcache(h, backed, 5, find_order(pagesize) - 1, pagesize, false); + heap mc = allocate_mcache(h, backed, 5, find_order(pagesize) - 1, pagesize, false); + assert(mc != INVALID_ADDRESS); + return locking ? locking_heap_wrapper(h, mc) : mc; } extern void *trap_handler; diff --git a/src/riscv64/kernel_machine.h b/src/riscv64/kernel_machine.h index 4b2f6b123..2209e1a07 100644 --- a/src/riscv64/kernel_machine.h +++ b/src/riscv64/kernel_machine.h @@ -135,6 +135,9 @@ static inline void wait_for_interrupt(void) disable_interrupts(); } +#define cmdline_consume(o, h) (void)(h) +#define boot_params_apply(t) + /* locking constructs */ #include diff --git a/src/runtime/bitmap.c b/src/runtime/bitmap.c index 6308ed2d8..d4eb0f616 100644 --- a/src/runtime/bitmap.c +++ b/src/runtime/bitmap.c @@ -1,7 +1,11 @@ /* bitmap allocator Allocations are aligned to next power-of-2 equal or greater than - allocation size. This allows a simpler and faster allocation + allocation size. For allocations done with bitmap_alloc_within_range(), + the above alignment rule applies to the offset from the start argument; + for optimal performance, the start argument should be a multiple of + BITMAP_WORDLEN for allocation size values greater than (BITMAP_WORDLEN / 2). + This allows a simpler and faster allocation algorithm, and also has the side-effect of allowing mixed page sizes (e.g. 4K + 2M) within the same space. @@ -11,10 +15,6 @@ #include -#define BITMAP_WORDLEN_LOG 6 -#define BITMAP_WORDLEN (1 << BITMAP_WORDLEN_LOG) -#define BITMAP_WORDMASK (BITMAP_WORDLEN - 1) - static inline u64 * pointer_from_bit(u64 * base, u64 bit) { return base + (bit >> BITMAP_WORDLEN_LOG); @@ -121,7 +121,7 @@ static inline u64 bitmap_alloc_internal(bitmap b, u64 nbits, u64 startbit, u64 e u64 stride = U64_FROM_BIT(order); endbit = MIN(endbit, b->maxbits); - u64 bit = pad(startbit, stride); + u64 bit = startbit; if (bit + nbits > endbit) return INVALID_PHYSICAL; @@ -190,13 +190,6 @@ boolean bitmap_dealloc(bitmap b, u64 bit, u64 size) u64 * mapbase = bitmap_base(b); assert(mapbase); - /* XXX maybe error code instead of msg_err... */ - if (bit & (size - 1)) { - msg_err("bitmap %p, bit %ld is not aligned to order %ld\n", - b, bit, order); - return false; - } - if (bit + size > b->maxbits) { msg_err("bitmap %p, bit %ld, order %ld: exceeds bit length %ld\n", b, bit, order, b->maxbits); diff --git a/src/runtime/bitmap.h b/src/runtime/bitmap.h index 44d6d550f..376ced018 100644 --- a/src/runtime/bitmap.h +++ b/src/runtime/bitmap.h @@ -1,3 +1,7 @@ +#define BITMAP_WORDLEN_LOG 6 +#define BITMAP_WORDLEN (1 << BITMAP_WORDLEN_LOG) +#define BITMAP_WORDMASK (BITMAP_WORDLEN - 1) + /* XXX keep allocs small for now; rolling heap allocations more than a page are b0rked */ #define ALLOC_EXTEND_BITS U64_FROM_BIT(12) diff --git a/src/runtime/extra_prints.c b/src/runtime/extra_prints.c index e11bcaf0b..658786ba4 100644 --- a/src/runtime/extra_prints.c +++ b/src/runtime/extra_prints.c @@ -158,7 +158,7 @@ static boolean is_binary_buffer(buffer b) static void print_value_internal(buffer dest, value v, table *visited, s32 indent, s32 depth) { - if (is_tuple(v) || is_vector(v)) { + if (is_composite(v)) { if (!*visited) { *visited = allocate_table(transient, identity_key, pointer_equal); assert(visited != INVALID_ADDRESS); diff --git a/src/runtime/heap/id.c b/src/runtime/heap/id.c index be56d805f..5c2d9a823 100644 --- a/src/runtime/heap/id.c +++ b/src/runtime/heap/id.c @@ -19,6 +19,7 @@ typedef struct id_range { struct rmnode n; /* range in pages */ bitmap b; + u64 bitmap_start; u64 next_bit[NEXT_BIT_COUNT]; /* for next-fit search */ } *id_range; @@ -47,7 +48,7 @@ static id_range id_add_range(id_heap i, u64 base, u64 length) if (length == infinity) { length -= base; /* bitmap will round up to next 64 page boundary, don't wrap */ - length &= ~((1ull << (page_order(i) + 6)) - 1); + length &= ~((1ull << (page_order(i) + BITMAP_WORDLEN_LOG)) - 1); } /* assert only page-sized and non-zero */ @@ -67,11 +68,23 @@ static id_range id_add_range(id_heap i, u64 base, u64 length) msg_err("%s: range insertion failure; conflict with range %R\n", __func__, ir->n.r); goto fail; } - ir->b = allocate_bitmap(i->meta, i->map, pages); + + /* To optimize performance of bitmap allocations, make the bitmap range start at a + * BITMAP_WORDLEN-aligned value, so that the start argument to bitmap_alloc_within_range() is + * always aligned to BITMAP_WORDLEN for allocation size values greater than + * (BITMAP_WORDLEN / 2). */ + u64 page_start_mask = page_start & BITMAP_WORDMASK; + + ir->b = allocate_bitmap(i->meta, i->map, pages + page_start_mask); if (ir->b == INVALID_ADDRESS) { msg_err("%s: failed to allocate bitmap for range %R\n", __func__, ir->n.r); goto fail; } + if (page_start_mask) + /* Mark the initial bits (which are not part of the range supplied to this function) as + * allocated, to prevent the bitmap from returning these bits during allocations. */ + bitmap_range_check_and_set(ir->b, 0, page_start_mask, false, true); + ir->bitmap_start = page_start & ~BITMAP_WORDMASK; zero(ir->next_bit, sizeof(ir->next_bit)); i->total += length; id_debug("added range base 0x%lx, end 0x%lx (length 0x%lx)\n", base, base + length, length); @@ -109,8 +122,8 @@ static u64 id_alloc_from_range(id_heap i, id_range r, u64 pages, range subrange) /* align search start (but not end, for pages may fit in a non-power-of-2 remainder at the end of the range) and remove id_range offset */ - ri.start = pad(ri.start, pages_rounded) - r->n.r.start; - ri.end -= r->n.r.start; + ri.start = pad(ri.start, pages_rounded) - r->bitmap_start; + ri.end -= r->bitmap_start; id_debug("after adjust %R\n", ri); if (!range_valid(ri) || range_span(ri) < pages) { id_debug("range invalid %d, range_span(ri) %ld, pages %ld\n", @@ -140,7 +153,7 @@ static u64 id_alloc_from_range(id_heap i, id_range r, u64 pages, range subrange) set_next_bit(r, page_order, bit + pages_rounded); i->allocated += pages << page_order(i); - u64 result = (r->n.r.start + bit) << page_order(i); + u64 result = (r->bitmap_start + bit) << page_order(i); id_debug("allocated bit %ld, range page start %ld, returning 0x%lx\n", bit, r->n.r.start, result); return result; @@ -178,7 +191,7 @@ closure_function(2, 1, boolean, dealloc_from_range, range ri = range_intersection(q, n->r); id_range r = (id_range)n; - int bit = ri.start - n->r.start; + int bit = ri.start - r->bitmap_start; u64 pages = range_span(ri); int order = find_order(pages); if (!bitmap_dealloc(r->b, bit, pages)) { @@ -259,7 +272,7 @@ closure_function(3, 1, boolean, set_intersection, { range ri = range_intersection(bound(q), n->r); id_range r = (id_range)n; - int bit = ri.start - n->r.start; + int bit = ri.start - r->bitmap_start; if (!bitmap_range_check_and_set(r->b, bit, range_span(ri), bound(validate), bound(allocate))) return false; return true; @@ -342,7 +355,7 @@ static inline void set_next(id_heap i, bytes count, u64 next) rangemap_foreach(i->ranges, n) { id_range r = (id_range)n; if (point_in_range(r->n.r, next)) - set_next_bit(r, order, next - r->n.r.start); + set_next_bit(r, order, next - r->bitmap_start); else if (r->n.r.start > next) set_next_bit(r, order, 0); } @@ -440,7 +453,7 @@ static value id_management(heap h) symbol s; tuple t = timm("type", "id", "pagesize", "%d", i->h.pagesize); assert(t != INVALID_ADDRESS); - tuple_notifier n = tuple_notifier_wrap(t); + tuple_notifier n = tuple_notifier_wrap(t, false); assert(n != INVALID_ADDRESS); register_stat(i, n, t, allocated); register_stat(i, n, t, total); diff --git a/src/runtime/heap/mcache.c b/src/runtime/heap/mcache.c index 66844f9f9..82ad98b23 100644 --- a/src/runtime/heap/mcache.c +++ b/src/runtime/heap/mcache.c @@ -282,7 +282,7 @@ static value mcache_management(heap h) symbol s; tuple t = timm("type", "mcache", "pagesize", "%d", m->h.pagesize); assert(t != INVALID_ADDRESS); - tuple_notifier n = tuple_notifier_wrap(t); + tuple_notifier n = tuple_notifier_wrap(t, false); assert(n != INVALID_ADDRESS); register_stat(m, n, t, allocated); register_stat(m, n, t, total); diff --git a/src/runtime/heap/objcache.c b/src/runtime/heap/objcache.c index a0e500c49..dbd66b28e 100644 --- a/src/runtime/heap/objcache.c +++ b/src/runtime/heap/objcache.c @@ -439,7 +439,7 @@ static value objcache_management(heap h) symbol s; tuple t = timm("type", "objcache", "pagesize", "%d", object_size(o)); assert(t != INVALID_ADDRESS); - tuple_notifier n = tuple_notifier_wrap(t); + tuple_notifier n = tuple_notifier_wrap(t, false); assert(n != INVALID_ADDRESS); register_stat(o, n, t, allocated); register_stat(o, n, t, total); diff --git a/src/runtime/heap/reserve.c b/src/runtime/heap/reserve.c index e6f2d0c56..4a3a8637e 100644 --- a/src/runtime/heap/reserve.c +++ b/src/runtime/heap/reserve.c @@ -83,7 +83,7 @@ static value reservelock_management(heap h) symbol s; tuple t = timm("type", "reservelock", "pagesize", "%d", rl->h.pagesize); assert(t != INVALID_ADDRESS); - tuple_notifier n = tuple_notifier_wrap(t); + tuple_notifier n = tuple_notifier_wrap(t, false); assert(n != INVALID_ADDRESS); register_stat(rl, n, t, allocated); register_stat(rl, n, t, total); diff --git a/src/runtime/management.c b/src/runtime/management.c index 43e17d377..21b9de1f8 100644 --- a/src/runtime/management.c +++ b/src/runtime/management.c @@ -215,11 +215,29 @@ tuple allocate_function_tuple(tuple_get g, tuple_set s, tuple_iterate i) typedef struct tuple_notifier { struct function_tuple f; - tuple parent; + value parent; table get_notifys; /* get_value_notifys */ table set_notifys; /* set_value_notifys */ } *tuple_notifier; +typedef struct tuple_notifier_cow { + struct tuple_notifier tn; + value parent_copy; + struct spinlock lock; +} *tuple_notifier_cow; + +#ifdef KERNEL + +#define tn_cow_lock(tn) spin_lock(&(tn)->lock) +#define tn_cow_unlock(tn) spin_unlock(&(tn)->lock) + +#else + +#define tn_cow_lock(tn) +#define tn_cow_unlock(tn) + +#endif + /* It might be a shade more elegant to make a function-backed value to poll dynamic values rather than use a get notifier, and it would avoid the table lookup in get_notifys. There are some subtleties that need to be sorted @@ -247,6 +265,53 @@ closure_function(1, 1, value, tuple_notifier_get, return get(bound(tn)->parent, a); /* transparent */ } +static boolean tn_init_copy(tuple_notifier_cow tn) +{ + if (tn->parent_copy == INVALID_ADDRESS) { + if (is_tuple(tn->tn.parent)) + tn->parent_copy = allocate_tuple(); + else + tn->parent_copy = allocate_tagged_vector(vector_length(tn->tn.parent)); + if (tn->parent_copy == INVALID_ADDRESS) + return false; + } + return true; +} + +closure_function(1, 1, value, tuple_notifier_cow_get, + tuple_notifier_cow, tn, + value, a) +{ + symbol s; + tuple_notifier_cow tn_cow = bound(tn); + tuple_notifier tn = &tn_cow->tn; + get_value_notify n; + if (tn->get_notifys && ((s = sym_from_attribute(a)) && (n = table_find(tn->get_notifys, s)))) + return apply(n); + tn_cow_lock(tn_cow); + value v; + if (tn_cow->parent_copy != INVALID_ADDRESS) { + v = get(tn_cow->parent_copy, a); + if (v) + goto out; + } + v = get(tn->parent, a); + if (is_composite(v)) { + if (!tn_init_copy(tn_cow)) { + v = 0; + goto out; + } + v = tuple_notifier_wrap(v, true); + if (v != INVALID_ADDRESS) + set(tn_cow->parent_copy, a, v); + else + v = 0; + } + out: + tn_cow_unlock(tn_cow); + return v; +} + closure_function(1, 2, void, tuple_notifier_set, tuple_notifier, tn, value, a, value, v) @@ -262,6 +327,24 @@ closure_function(1, 2, void, tuple_notifier_set, set(bound(tn)->parent, a, v); } +closure_function(1, 2, void, tuple_notifier_cow_set, + tuple_notifier_cow, tn, + value, a, value, v) +{ + tuple_notifier_cow tn_cow = bound(tn); + tuple_notifier tn = &tn_cow->tn; + symbol s; + set_value_notify n; + if (tn->set_notifys && ((s = sym_from_attribute(a))) && (n = table_find(tn->set_notifys, s))) { + if (!apply(n, v)) + return; /* setting of value not allowed */ + } + tn_cow_lock(tn_cow); + if (tn_init_copy(tn_cow)) + set(tn_cow->parent_copy, a, v); + tn_cow_unlock(tn_cow); +} + closure_function(2, 2, boolean, tuple_notifier_iterate_each, tuple_notifier, tn, binding_handler, h, value, a, value, v) @@ -279,12 +362,71 @@ closure_function(1, 1, boolean, tuple_notifier_iterate, binding_handler, h) { /* This assumes that all attributes of interest exist in the parent - tuple. Values that are served by get_notifys should still have - corresponding entries in the parent tuple if they are to be included in + value. Values that are served by get_notifys should still have + corresponding entries in the parent value if they are to be included in an iterate. */ return iterate(bound(tn)->parent, stack_closure(tuple_notifier_iterate_each, bound(tn), h)); } +closure_function(2, 2, boolean, tuple_notifier_cow_iterate_each, + tuple_notifier_cow, tn, binding_handler, h, + value, a, value, v) +{ + tuple_notifier_cow tn_cow = bound(tn); + tuple_notifier tn = &tn_cow->tn; + symbol s = sym_from_attribute(a); + get_value_notify n; + if (tn->get_notifys && (n = table_find(tn->get_notifys, s))) { + v = apply(n); + } else { + tn_cow_lock(tn_cow); + value v_copy; + if (tn_cow->parent_copy != INVALID_ADDRESS) { + value v_copy = get(tn_cow->parent_copy, a); + if (v_copy) { + v = v_copy; + } + } else { + v_copy = 0; + } + if (!v_copy && is_composite(v)) { + if (!tn_init_copy(tn_cow)) + goto error; + v = tuple_notifier_wrap(v, true); + if (v == INVALID_ADDRESS) + goto error; + set(tn_cow->parent_copy, a, v); + } + tn_cow_unlock(tn_cow); + } + return apply(bound(h), s, v); + error: + tn_cow_unlock(tn_cow); + return false; +} + +closure_function(2, 2, boolean, tuple_notifier_iterate_copy_each, + value, parent, binding_handler, h, + value, a, value, v) +{ + if (get(bound(parent), a)) /* value has been handled in parent iterator */ + return true; + return apply(bound(h), sym_from_attribute(a), v); +} + +closure_function(1, 1, boolean, tuple_notifier_cow_iterate, + tuple_notifier_cow, tn, + binding_handler, h) +{ + tuple_notifier_cow tn = bound(tn); + value parent = tn->tn.parent; + if (!iterate(parent, stack_closure(tuple_notifier_cow_iterate_each, tn, h))) + return false; + if (tn->parent_copy == INVALID_ADDRESS) + return true; + return iterate(tn->parent_copy, stack_closure(tuple_notifier_iterate_copy_each, parent, h)); +} + closure_function(1, 0, value, tuple_notifier_wrapped, tuple_notifier, tn) { @@ -307,22 +449,36 @@ void tuple_notifier_register_set_notify(tuple_notifier tn, symbol s, set_value_n assert(tn->set_notifys != INVALID_ADDRESS); } table_set(tn->set_notifys, s, n); - value v = get(tn->parent, s); + value v = get(tn, s); if (v) apply(n, v); } -tuple_notifier tuple_notifier_wrap(tuple parent) +tuple_notifier tuple_notifier_wrap(value parent, boolean copy_on_write) { - tuple_notifier tn = allocate(management.fth, sizeof(struct tuple_notifier)); + tuple_notifier tn; + tuple_notifier_cow tn_cow; + if (copy_on_write) { + tn_cow = allocate(management.fth, sizeof(struct tuple_notifier_cow)); + tn = &tn_cow->tn; + } else { + tn = allocate(management.fth, sizeof(struct tuple_notifier)); + } if (tn == INVALID_ADDRESS) return tn; tn->parent = parent; tn->get_notifys = 0; tn->set_notifys = 0; - tn->f.g = closure(management.h, tuple_notifier_get, tn); - tn->f.s = closure(management.h, tuple_notifier_set, tn); - tn->f.i = closure(management.h, tuple_notifier_iterate, tn); + tn->f.g = copy_on_write ? closure(management.h, tuple_notifier_cow_get, tn_cow) : + closure(management.h, tuple_notifier_get, tn); + tn->f.s = copy_on_write ? closure(management.h, tuple_notifier_cow_set, tn_cow) : + closure(management.h, tuple_notifier_set, tn); + tn->f.i = copy_on_write ? closure(management.h, tuple_notifier_cow_iterate, tn_cow) : + closure(management.h, tuple_notifier_iterate, tn); + if (copy_on_write) { + tn_cow->parent_copy = INVALID_ADDRESS; + spin_lock_init(&tn_cow->lock); + } /* The special /wrapped attribute is probed by print_value and friends. Since it's not in the parent tuple, it won't show up in an iterate; it's hidden. */ diff --git a/src/runtime/management.h b/src/runtime/management.h index a4d552871..49daacace 100644 --- a/src/runtime/management.h +++ b/src/runtime/management.h @@ -9,7 +9,7 @@ typedef struct tuple_notifier *tuple_notifier; typedef closure_type(set_value_notify, boolean, value); typedef closure_type(get_value_notify, value); -tuple_notifier tuple_notifier_wrap(tuple parent); +tuple_notifier tuple_notifier_wrap(value parent, boolean copy_on_write); void tuple_notifier_unwrap(tuple_notifier tn); void tuple_notifier_register_get_notify(tuple_notifier tn, symbol s, get_value_notify n); void tuple_notifier_register_set_notify(tuple_notifier tn, symbol s, set_value_notify n); diff --git a/src/runtime/memops.c b/src/runtime/memops.c index 1b8885e45..e22d4f3ab 100644 --- a/src/runtime/memops.c +++ b/src/runtime/memops.c @@ -204,3 +204,11 @@ int runtime_memcmp(const void *a, const void *b, bytes len) } return memcmp_8(a + len - end_len, p_long_b, end_len); } + +void *runtime_memchr(const void *a, int c, bytes len) +{ + for (const char *p = a; len > 0; p++, len--) + if (*p == c) + return (void *)p; + return 0; +} diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index e282fdd17..e8ba63ee2 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -87,6 +87,7 @@ void runtime_memcpy(void *a, const void *b, bytes len); void runtime_memset(u8 *a, u8 b, bytes len); int runtime_memcmp(const void *a, const void *b, bytes len); +void *runtime_memchr(const void *a, int c, bytes len); static inline int runtime_strlen(const char *a) { diff --git a/src/runtime/tuple.c b/src/runtime/tuple.c index 8d0258f13..09c336e5b 100644 --- a/src/runtime/tuple.c +++ b/src/runtime/tuple.c @@ -125,6 +125,18 @@ boolean iterate(value e, binding_handler h) } } +boolean is_composite(value v) +{ + switch (tagof(v)) { + case tag_table_tuple: + case tag_function_tuple: + case tag_vector: + return true; + default: + return false; + } +} + closure_function(1, 2, boolean, tuple_count_each, int *, count, value, s, value, v) @@ -227,7 +239,7 @@ closure_function(2, 2, boolean, destruct_value_each, value, v, boolean, recursive, value, s, value, v) { - if (is_tuple(v) || is_vector(v)) { + if (is_composite(v)) { if (bound(recursive)) destruct_value(v, true); } else if (v != null_value) { @@ -238,7 +250,7 @@ closure_function(2, 2, boolean, destruct_value_each, void destruct_value(value v, boolean recursive) { - if (is_tuple(v) || is_vector(v)) + if (is_composite(v)) iterate(v, stack_closure(destruct_value_each, v, recursive)); deallocate_value(v); } diff --git a/src/runtime/tuple.h b/src/runtime/tuple.h index e08c5d3e7..fb474b843 100644 --- a/src/runtime/tuple.h +++ b/src/runtime/tuple.h @@ -74,6 +74,8 @@ static inline boolean is_integer(value v) return tagof(v) == tag_integer; } +boolean is_composite(value v); + /* we're lax about typing here as these are sometimes used on alloca-wrapped buffers */ static inline boolean u64_from_value(value v, u64 *result) { diff --git a/src/unix/coredump.c b/src/unix/coredump.c index 53cb26e94..34874ed8b 100644 --- a/src/unix/coredump.c +++ b/src/unix/coredump.c @@ -221,7 +221,7 @@ void coredump(thread t, struct siginfo *si, status_handler complete) return; } u64 doff = 0; - heap h = heap_general(get_kernel_heaps()); + heap h = heap_locked(get_kernel_heaps()); process p = t->p; status s = STATUS_OK; diff --git a/src/unix/syscall.c b/src/unix/syscall.c index fce89b9bb..c35993f7a 100755 --- a/src/unix/syscall.c +++ b/src/unix/syscall.c @@ -2753,18 +2753,20 @@ closure_function(0, 2, void, print_missing_files_cfn, rprintf("missing_files_end\n"); } -void init_syscalls(tuple root) +void init_syscalls(process p) { heap h = heap_locked(get_kernel_heaps()); syscall = syscall_handler; syscall_io_complete = closure(h, syscall_io_complete_cfn); io_completion_ignore = closure(h, io_complete_ignore); + filesystem fs = p->root_fs; vector hostname_v = split(h, alloca_wrap_cstring("etc/hostname"), '/'); - tuple hostname_t = resolve_path(root, hostname_v); + tuple hostname_t = resolve_path(filesystem_getroot(fs), hostname_v); split_dealloc(hostname_v); if (hostname_t) - filesystem_read_entire(get_root_fs(), hostname_t, h, + filesystem_read_entire(fs, hostname_t, h, closure(h, hostname_done), ignore_status); + tuple root = p->process_root; do_syscall_stats = get(root, sym(syscall_summary)) != 0; if (do_syscall_stats) { print_syscall_stats = closure(h, print_syscall_stats_cfn); @@ -2871,7 +2873,7 @@ closure_function(1, 1, boolean, notrace_notify, value, v) { notrace_configure(bound(p), false); - if (is_tuple(v) || is_vector(v)) + if (is_composite(v)) iterate(v, stack_closure(notrace_each, bound(p), true)); return true; } @@ -2881,7 +2883,7 @@ closure_function(1, 1, boolean, tracelist_notify, value, v) { notrace_configure(bound(p), true); - if (is_tuple(v) || is_vector(v)) + if (is_composite(v)) iterate(v, stack_closure(notrace_each, bound(p), false)); return true; } diff --git a/src/unix/unix.c b/src/unix/unix.c index 99686b62b..d7d674b9b 100644 --- a/src/unix/unix.c +++ b/src/unix/unix.c @@ -533,7 +533,7 @@ process create_process(unix_heaps uh, tuple root, filesystem fs) } filesystem_reserve(fs); /* because it hosts the current working directory */ p->root_fs = p->cwd_fs = fs; - p->cwd = fs->get_inode(fs, root); + p->cwd = fs->get_inode(fs, filesystem_getroot(fs)); p->process_root = root; p->fdallocator = create_id_heap(locked, locked, 0, infinity, 1, false); p->files = allocate_vector(locked, 64); @@ -658,7 +658,7 @@ process init_unix(kernel_heaps kh, tuple root, filesystem fs) ftrace_enable(); register_special_files(kernel_process); - init_syscalls(kernel_process->process_root); + init_syscalls(kernel_process); register_file_syscalls(linux_syscalls); #ifdef NET register_net_syscalls(linux_syscalls); diff --git a/src/unix/unix_internal.h b/src/unix/unix_internal.h index 4296a7aa2..b2301e087 100644 --- a/src/unix/unix_internal.h +++ b/src/unix/unix_internal.h @@ -1021,7 +1021,7 @@ static inline u64 iov_total_len(struct iovec *iov, int iovcnt) #define resolve_fd(__p, __fd) ({void *f ; if (!(f = fdesc_get(__p, __fd))) return set_syscall_error(current, EBADF); f;}) -void init_syscalls(tuple root); +void init_syscalls(process p); void init_threads(process p); void init_futices(process p); diff --git a/src/virtio/virtio.h b/src/virtio/virtio.h index 7039e7d61..f1031b262 100644 --- a/src/virtio/virtio.h +++ b/src/virtio/virtio.h @@ -6,5 +6,4 @@ void init_virtio_rng(kernel_heaps kh); void init_virtio_scsi(kernel_heaps kh, storage_attach a); void init_virtio_socket(kernel_heaps kh); -void virtio_mmio_parse(kernel_heaps kh, const char *str, int len); void virtio_mmio_enum_devs(kernel_heaps kh); diff --git a/src/virtio/virtio_mmio.c b/src/virtio/virtio_mmio.c index f53925193..87c0a5837 100644 --- a/src/virtio/virtio_mmio.c +++ b/src/virtio/virtio_mmio.c @@ -51,7 +51,9 @@ closure_function(1, 1, void, vtmmio_new_dev, list_push_back(&vtmmio_devices, &dev->l); } -void virtio_mmio_parse(kernel_heaps kh, const char *str, int len) +closure_function(1, 2, void, vtmmio_cmdline_parse, + kernel_heaps, kh, + const char *, str, int, len) { buffer b = alloca_wrap_buffer(str, len); int optname_len = buffer_strchr(b, '='); @@ -86,12 +88,13 @@ void virtio_mmio_parse(kernel_heaps kh, const char *str, int len) .memsize = memsize, .irq = irq, }; - apply(stack_closure(vtmmio_new_dev, kh), &adev); + apply(stack_closure(vtmmio_new_dev, bound(kh)), &adev); } } void virtio_mmio_enum_devs(kernel_heaps kh) { + cmdline_consume("virtio_mmio", stack_closure(vtmmio_cmdline_parse, kh)); acpi_get_vtmmio_devs(stack_closure(vtmmio_new_dev, kh)); } diff --git a/src/x86_64/kernel_machine.c b/src/x86_64/kernel_machine.c index 2a697762a..e9e5f955d 100644 --- a/src/x86_64/kernel_machine.c +++ b/src/x86_64/kernel_machine.c @@ -36,7 +36,7 @@ void interrupt_exit(void) lapic_eoi(); } -heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize) +heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize, boolean locking) { heap h = heap_locked(kh); heap p = (heap)heap_physical(kh); @@ -52,7 +52,9 @@ heap allocate_tagged_region(kernel_heaps kh, u64 tag, bytes pagesize) /* reserve area in virtual_huge */ assert(id_heap_set_area(heap_virtual_huge(kh), tag_base, tag_length, true, true)); - return allocate_mcache(h, backed, 5, find_order(pagesize) - 1, pagesize, false); + heap mc = allocate_mcache(h, backed, 5, find_order(pagesize) - 1, pagesize, false); + assert(mc != INVALID_ADDRESS); + return locking ? locking_heap_wrapper(h, mc) : mc; } void clone_frame_pstate(context_frame dest, context_frame src) diff --git a/src/x86_64/kernel_machine.h b/src/x86_64/kernel_machine.h index 88207b7f2..b0d3b6325 100644 --- a/src/x86_64/kernel_machine.h +++ b/src/x86_64/kernel_machine.h @@ -170,6 +170,9 @@ void install_gdt64_and_tss(void *tss_desc, void *tss, void *gdt, void *gdt_point #ifdef KERNEL /* locking constructs */ #include + +void cmdline_consume(const char *opt_name, cmdline_handler h); +void boot_params_apply(tuple t); #endif /* device mmio region access */ diff --git a/test/unit/id_heap_test.c b/test/unit/id_heap_test.c index a91e7580a..f409777a4 100644 --- a/test/unit/id_heap_test.c +++ b/test/unit/id_heap_test.c @@ -361,6 +361,27 @@ static boolean alloc_subrange_test(heap h) return true; } +static boolean alloc_align_test(heap h) +{ + const u64 unaligned_base = 0x12345; + const u64 heap_size = 0x20000; + id_heap id = create_id_heap(h, h, unaligned_base, heap_size, 1, false); + if (id == INVALID_ADDRESS) { + msg_err("cannot create heap\n"); + return false; + } + for (int size = 1; pad(unaligned_base, size) + size <= unaligned_base + heap_size; size <<= 1) { + u64 i = allocate_u64((heap)id, size); + if (i != pad(unaligned_base, size)) { + msg_err("unexpected allocation 0x%lx for size 0x%x\n", i, size); + return false; + } + deallocate_u64((heap)id, i, size); + } + destroy_heap((heap)id); + return true; +} + int main(int argc, char **argv) { heap h = init_process_runtime(); @@ -383,6 +404,9 @@ int main(int argc, char **argv) if (!alloc_subrange_test(h)) goto fail; + if (!alloc_align_test(h)) + goto fail; + msg_debug("test passed\n"); exit(EXIT_SUCCESS); fail: