Skip to content

Commit

Permalink
stage2 and low memory cleanup (#959)
Browse files Browse the repository at this point in the history
* alteration to id heap interface to allow allocating within a specified range (id_heap_alloc_subrange()); move wrap-around alloc for random and next-fit allocations to id and out of bitmap

* stage2 cleanup: set up heaps and page tables before loading kernel elf, allocate a much larger working heap off of the physical region heap (which will be unmapped and released in stage3) to accomodate extensive tfs meta, cleanup various cruft and aggregate memory-related defines into uniboot.h

* mapping the allocated stage2 working heap wasn't necessary; return it as well as any trimmed phys mem above the kernel map to the pool of physical memory for stage3

* correct filesystem sizes throughout (size of volume if fixed, infinity otherwise)

* shrink early working heap area to 1K, move from extended mem to bss; allocate expanded area before switching to new stack

* communicate the complete extent of the identity-mapped page table region from stage2 to stage3 and ultimately unix mmap setup so that it's properly protected from user mappings; open up high 2gb of 32-bit space for MMAP_32BIT mappings

* add unit test for id_heap_alloc_subrange(); fix incorrect handling of start and end boundaries in bitmap_alloc_internal; change byte ranges to page ranges in id heap

* mmap: use p->virtual32 to track the entirety of 32-bit address space, change localized closure uses to stack closures; fix erroneous process virtual area which overlapped tagged region, reserve tagged regions (kernel and user) in virtual_huge

* stage2: reduce wasted address space by reordering physical allocations such that only the identity map remains below the kernel, stash stage2 stack as region to be unmapped/reclaimed in stage3; exec: clean up and move address space constants to unix_internal.h, clean up ASLR computation, compute stack ASLR manually rather than using randomized heap which would force stack length sized alignment
  • Loading branch information
wjhun committed Aug 20, 2019
1 parent 595d568 commit 99f3c18
Show file tree
Hide file tree
Showing 15 changed files with 300 additions and 196 deletions.
141 changes: 86 additions & 55 deletions boot/stage2.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,37 +25,18 @@ extern void run64(u32 entry);
* 0x7e00..0x7fff - unused
* 0x8000.. - stage2 code
*/

#define EARLY_WORKING_SIZE KB
#define STACKLEN (8 * PAGESIZE)

#define REAL_MODE_STACK_SIZE 0x1000
#define SCRATCH_BASE 0x500
#define BOOT_BASE 0x7c00
#define SCRATCH_LEN (BOOT_BASE - REAL_MODE_STACK_SIZE)

/* We're placing the working heap base at the beginning of extended
memory. Use of this heap is tossed out in the move to stage3, thus
no mapping set up for it.
XXX grub support: Figure out how to probe areas used by grub modules, etc.
XXX can check e820 regions
*/
#define WORKING_BASE 0x100000
#define WORKING_LEN (4*MB) /* arbitrary, must be enough for any fs meta */
static u64 working = WORKING_BASE;

#define STACKLEN (8 * PAGESIZE)
static struct heap workings;
static struct kernel_heaps kh;
static u32 stack;
#define SCRATCH_BASE 0x500
#define BOOT_BASE 0x7c00
#define SCRATCH_LEN (BOOT_BASE - REAL_MODE_STACK_SIZE)

// xxx - should have a general wrapper/analysis thingly
static u64 stage2_allocator(heap h, bytes b)
{
// tag requires 4 byte aligned addresses
u64 result = working;
working += pad(b, 4);
if (working > (WORKING_BASE + WORKING_LEN))
halt("stage2 working heap out of memory\n");
return result;
}
static struct kernel_heaps kh;
static u32 stack_base;
static u32 identity_base;

static u64 s[2] = { 0xa5a5beefa5a5cafe, 0xbeef55aaface55aa };

Expand Down Expand Up @@ -151,36 +132,46 @@ void fail(status s)
halt("filesystem_read_entire failed: %v\n", s);
}

static CLOSURE_0_1(kernel_read_complete, void, buffer);
static void __attribute__((noinline)) kernel_read_complete(buffer kb)
static void setup_page_tables()
{
stage2_debug("%s\n", __func__);
heap physical = heap_physical(&kh);
heap working = heap_general(&kh);

// should be the intersection of the empty physical and virtual
// up to some limit, 2M aligned
u64 identity_length = 0x300000;
u64 pmem = allocate_u64(physical, identity_length);
heap pages = region_allocator(working, PAGESIZE, REGION_IDENTITY);
/* identity heap alloc */
stage2_debug("identity heap at [0x%x, 0x%x)\n", identity_base, identity_base + IDENTITY_HEAP_SIZE);
create_region(identity_base, IDENTITY_HEAP_SIZE, REGION_IDENTITY);
create_region(identity_base, IDENTITY_HEAP_SIZE, REGION_IDENTITY_RESERVED);
heap pages = region_allocator(heap_general(&kh), PAGESIZE, REGION_IDENTITY);
kh.pages = pages;
create_region(pmem, identity_length, REGION_IDENTITY);

/* page table setup */
void *vmbase = allocate_zero(pages, PAGESIZE);
mov_to_cr("cr3", vmbase);
map(pmem, pmem, identity_length, PAGE_WRITABLE | PAGE_PRESENT, pages);
// going to some trouble to set this up here, but its barely
// used in stage3
stack -= (STACKLEN - 4); /* XXX b0rk b0rk b0rk */
map(stack, stack, (u64)STACKLEN, PAGE_WRITABLE, pages);

/* initial map, page tables and stack */
map(0, 0, INITIAL_MAP_SIZE, PAGE_WRITABLE | PAGE_PRESENT, pages);
map(identity_base, identity_base, IDENTITY_HEAP_SIZE, PAGE_WRITABLE | PAGE_PRESENT, pages);
map(stack_base, stack_base, (u64)STACKLEN, PAGE_WRITABLE, pages);
}

// stash away kernel elf image for use in stage3
static u64 working_saved_base;

static CLOSURE_0_1(kernel_read_complete, void, buffer);
static void __attribute__((noinline)) kernel_read_complete(buffer kb)
{
stage2_debug("%s\n", __func__);

/* save kernel elf image for use in stage3 (for symbol data) */
create_region(u64_from_pointer(buffer_ref(kb, 0)), pad(buffer_length(kb), PAGESIZE), REGION_KERNIMAGE);

void *k = load_elf(kb, 0, pages, physical, false);
void *k = load_elf(kb, 0, heap_pages(&kh), heap_physical(&kh), false);
if (!k) {
halt("kernel elf parse failed\n");
}

/* tell stage3 that pages from the stage2 working heap can be reclaimed */
assert(working_saved_base);
create_region(working_saved_base, STAGE2_WORKING_HEAP_SIZE, REGION_PHYSICAL);

run64(u64_from_pointer(k));
}

Expand Down Expand Up @@ -241,9 +232,12 @@ void newstack()
heap h = heap_general(&kh);
heap physical = heap_physical(&kh);
buffer_handler bh = closure(h, kernel_read_complete);

setup_page_tables();

create_filesystem(h,
SECTOR_SIZE,
1024 * MB, /* XXX change to infinity with new rtrie */
infinity,
0, /* ignored in boot */
get_stage2_disk_read(h, fs_offset),
closure(h, stage2_empty_write),
Expand All @@ -253,12 +247,34 @@ void newstack()
halt("kernel failed to execute\n");
}

// consider passing region area as argument to disperse magic
static struct heap working_heap;
static u8 early_working[EARLY_WORKING_SIZE] __attribute__((aligned(8)));
static u64 working_p;
static u64 working_end;

static u64 stage2_allocator(heap h, bytes b)
{
if (working_p + b > working_end)
halt("stage2 working heap out of memory\n");
u64 result = working_p;
working_p += pad(b, 4); /* tags require alignment */
#ifdef DEBUG_STAGE2_ALLOC
console("stage2 alloc ");
print_u64(result);
console(", ");
print_u64(working_p);
console("\n");
#endif
return result;
}

void centry()
{
workings.alloc = stage2_allocator;
workings.dealloc = leak;
kh.general = &workings;
working_heap.alloc = stage2_allocator;
working_heap.dealloc = leak;
working_p = u64_from_pointer(early_working);
working_end = working_p + EARLY_WORKING_SIZE;
kh.general = &working_heap;
init_runtime(&kh); /* we know only general is used */
init_extra_prints();
stage2_debug("%s\n", __func__);
Expand Down Expand Up @@ -300,10 +316,25 @@ void centry()
}
}

kh.physical = region_allocator(&workings, PAGESIZE, REGION_PHYSICAL);
kh.physical = region_allocator(&working_heap, PAGESIZE, REGION_PHYSICAL);
assert(kh.physical);

stack = allocate_u64(kh.physical, STACKLEN) + STACKLEN - 4;
asm("mov %0, %%esp": :"g"(stack));

/* allocate identity region for page tables */
identity_base = allocate_u64(kh.physical, IDENTITY_HEAP_SIZE);
assert(identity_base != INVALID_PHYSICAL);

/* allocate stage2 (and early stage3) stack */
stack_base = allocate_u64(kh.physical, STACKLEN);
assert(stack_base != INVALID_PHYSICAL);
create_region(stack_base, STACKLEN, REGION_RECLAIM);

/* allocate larger space for stage2 working (to accomodate tfs meta, etc.) */
working_p = allocate_u64(kh.physical, STAGE2_WORKING_HEAP_SIZE);
assert(working_p != INVALID_PHYSICAL);
working_saved_base = working_p;
working_end = working_p + STAGE2_WORKING_HEAP_SIZE;

u32 stacktop = stack_base + STACKLEN - 4;
asm("mov %0, %%esp": :"g"(stacktop));
newstack();
}
2 changes: 1 addition & 1 deletion mkfs/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ int main(int argc, char **argv)
tuple root = allocate_tuple();
create_filesystem(h,
SECTOR_SIZE,
10ull * 1024 * 1024 * 1024,
infinity,
h,
closure(h, bread, fd, get_fs_offset(fd)),
closure(h, bwrite, fd),
Expand Down
3 changes: 1 addition & 2 deletions mkfs/mkfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,6 @@ int main(int argc, char **argv)

heap h = init_process_runtime();
descriptor out = open(image_path, O_CREAT|O_RDWR, 0644);
u64 fs_size = 100ull * MB; /* XXX temp, change to infinity after rtrie/bitmap fix */
if (out < 0) {
halt("couldn't open output file %s: %s\n", image_path, strerror(errno));
}
Expand Down Expand Up @@ -374,7 +373,7 @@ int main(int argc, char **argv)
// fixing the size doesn't make sense in this context?
create_filesystem(h,
SECTOR_SIZE,
fs_size,
infinity,
h,
closure(h, bread, out),
closure(h, bwrite, out, offset),
Expand Down
5 changes: 5 additions & 0 deletions src/runtime/kernel_heaps.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ typedef struct kernel_heaps {
initialization. */
heap pages;

/* Used by unix/mmap.c, these define the address space that is
blocked out by the identity heap. */
u64 identity_reserved_start;
u64 identity_reserved_end;

/* Allocations of physical address space outside of pages are made
from the physical id heap. */
heap physical;
Expand Down
6 changes: 6 additions & 0 deletions src/runtime/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,9 @@ static inline boolean range_valid(range r)
{
return r.start <= r.end;
}

static inline void range_add(range *r, s64 delta)
{
r->start += delta;
r->end += delta;
}
27 changes: 21 additions & 6 deletions src/runtime/uniboot.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#include <predef.h>

#ifdef BOOT

#include <def32.h>
/* Keep this reasonable so we don't blow the stage2 working heap. */
#define TABLE_MAX_BUCKETS 1024
#else

#else /* BOOT */

#include <def64.h>
#define user_va_tag_offset 44
Expand All @@ -16,9 +16,6 @@
#define va_tag_offset user_va_tag_offset
#endif

/* maximum buckets that can fit within a PAGESIZE_2M mcache */
#define TABLE_MAX_BUCKETS 131072

static inline void* tag(void* v, u64 tval) {
return pointer_from_u64((tval << va_tag_offset) | u64_from_pointer(v));
}
Expand All @@ -31,4 +28,22 @@ static inline u16 tagof(void* v) {

#endif /* BOOT */

/* needed for physical region allocator, before we ever look at the
elf - be sure that this matches the stage3 linker script
(TODO: build time assert) */
#define KERNEL_RESERVE_START 0x7f000000
#define KERNEL_RESERVE_END 0x80000000

/* identity-mapped space for page tables - we can shrink this if we
ever make the page table code aware of mappings (e.g. virt_from_phys) */
#define IDENTITY_HEAP_SIZE (128 * MB)

/* the stage2 secondary working heap - this needs to be large enough
to accomodate all tfs allocations when loading the kernel - it gets
recycled in stage3, so be generous */
#define STAGE2_WORKING_HEAP_SIZE (128 * MB)

/* maximum buckets that can fit within a PAGESIZE_2M mcache */
#define TABLE_MAX_BUCKETS 131072

#include <x86.h>
2 changes: 1 addition & 1 deletion src/tfs/tfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ void create_filesystem(heap h,
fs->alignment = alignment;
fs->blocksize = SECTOR_SIZE;
#ifndef BOOT
fs->storage = create_id_heap(h, 0, infinity, SECTOR_SIZE);
fs->storage = create_id_heap(h, 0, size, SECTOR_SIZE);
assert(fs->storage != INVALID_ADDRESS);
assert(id_heap_set_area(fs->storage, 0, INITIAL_LOG_SIZE, true, true));
#endif
Expand Down
Loading

0 comments on commit 99f3c18

Please sign in to comment.