Skip to content

Commit 118ed15

Browse files
Khaclaude
andauthored
refactor: make CompactedRegion serializable (#13984)
This PR reshapes `CompactedRegion` from a bare `USize` pointer into a structure whose `filePath`, `size`, `isMemoryMapped`, and `depPaths` are ordinary Lean values, with the logical base address, a root-relative buffer offset, and a pointer to the region's root object kept in private fields. Because the metadata is now plain Lean data and the root pointer is an ordinary pointer into the region rather than an opaque handle, a `CompactedRegion` (and thus `Environment`) can be embedded in compacted data: as long as the region is also supplied in `depRegions`, the object compactor relocates the pointer like any other cross-region reference and it is re-resolved against the freshly mapped buffer on load. The region's buffer is no longer owned by a C++ struct. The former `compacted_region` becomes `region_reader`, a transient non-owning read-context that fixes up a just-mapped buffer and is then discarded; cross-region dependencies are passed as a lightweight `region_view` value (begin/size/base_addr) rather than a pointer. The Lean `CompactedRegion` owns the mapping, framed by its whole file (base address, root-to-base offset, file size), and `CompactedRegion.free` releases it directly via `munmap`/`free`; `free` first neutralizes the root pointer so that decrementing any surviving reference to the freed structure does not dereference released memory. The former `lean_compacted_region_{size,is_memory_mapped}` accessors become structure projections, and `read` populates all fields at load time. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ccfc89f commit 118ed15

5 files changed

Lines changed: 204 additions & 136 deletions

File tree

src/Lean/CompactedRegion.lean

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,43 @@ A compacted region holds multiple Lean objects in a contiguous memory region, wh
1616
read/written to/from disk. Objects inside the region do not have reference counters and cannot be
1717
freed individually. The contents of `.olean` files are compacted regions.
1818
-/
19-
@[expose] public def CompactedRegion := USize
20-
21-
@[extern "lean_compacted_region_is_memory_mapped"]
22-
public opaque CompactedRegion.isMemoryMapped : CompactedRegion → Bool
23-
24-
/-- Size in bytes. -/
25-
@[extern "lean_compacted_region_size"]
26-
public opaque CompactedRegion.size : CompactedRegion → USize
27-
28-
/--
29-
Frees a compacted region and its contents. No live references to the contents may exist at the
30-
time of invocation.
19+
/-
20+
This structure carries the region's loading metadata (`filePath`, `size`, ...) as plain Lean values
21+
plus a private `root` pointer to the region's root object. Because `root` is an ordinary pointer
22+
into the buffer rather than an opaque external handle, embedding a `CompactedRegion` in compacted
23+
data is sound as long as the region is supplied as one of the `depRegions`: the object compactor
24+
then relocates `root` like any other cross-region pointer, and it is re-resolved against the freshly
25+
mapped buffer on load.
3126
-/
32-
@[extern "lean_compacted_region_free"]
33-
public unsafe opaque CompactedRegion.free : CompactedRegion → IO Unit
27+
public structure CompactedRegion where
28+
/-- Path the region was loaded from. -/
29+
filePath : System.FilePath
30+
/-- Size in bytes of the region's mapping (the backing `.olean` file). -/
31+
size : USize
32+
/-- Whether the region's buffer is backed by a memory mapping of its file. -/
33+
isMemoryMapped : Bool
34+
/--
35+
Paths of the dep regions this region depends on, recorded at load time so a consumer can re-open
36+
them after a snapshot round-trip.
37+
-/
38+
depPaths : Array System.FilePath
39+
/--
40+
Logical base address the region's mapping is relative to (`olean_header.base_addr`), used to
41+
translate cross-region pointers when this region is used as a dependency.
42+
-/
43+
private baseAddr : USize
44+
/--
45+
Byte offset of the region's root object from the start of the mapping; `root - bufferOffset`
46+
recovers the mapping base (the address to unmap/free and the start of the dependency range).
47+
-/
48+
private bufferOffset : USize
49+
/--
50+
Pointer to the region's root object. A plain pointer into the data buffer (not an external handle),
51+
so the object compactor relocates it as a cross-region pointer when the region is embedded in
52+
compacted data with itself supplied as a `depRegion`.
53+
-/
54+
private root : NonScalar
55+
deriving Nonempty
3456

3557
opaque CompactorSpec : NonemptyType.{0}
3658
/--
@@ -44,6 +66,13 @@ subsequent saves will dereference dangling pointers.
4466
-/
4567
public def Compactor := CompactorSpec.type
4668

69+
/--
70+
Frees a compacted region and its contents. No live references to the contents may exist at the
71+
time of invocation.
72+
-/
73+
@[extern "lean_compacted_region_free"]
74+
public unsafe opaque CompactedRegion.free (region : CompactedRegion) : IO Unit
75+
4776
/--
4877
Saves arbitrary data to a compacted region on disk.
4978

src/library/module.cpp

Lines changed: 89 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ static void ensure_compactor_class() {
165165
}
166166
}
167167

168-
static lean_object * mk_compactor(void * base_addr, std::vector<compacted_region *> dep_regions,
168+
static lean_object * mk_compactor(void * base_addr, std::vector<region_view> dep_regions,
169169
bool allow_closures) {
170170
ensure_compactor_class();
171171
return lean_alloc_external(g_compactor_class,
@@ -176,14 +176,32 @@ static object_compactor * to_compactor(lean_object * o) {
176176
return static_cast<object_compactor *>(lean_get_external_data(o));
177177
}
178178

179-
// Extract `compacted_region *` pointers from an `Array CompactedRegion`.
180-
static std::vector<compacted_region *> extract_dep_regions(b_obj_arg odep_regions) {
181-
std::vector<compacted_region *> result;
179+
// Field layout of the Lean `CompactedRegion` structure (see `Lean/CompactedRegion.lean`). A region
180+
// is identified with its whole mapping: object fields 0 = `filePath`, 1 = `depPaths`, 2 = `root`
181+
// (a `NonScalar` pointer to the root object); usize slots 3..5 = `size` (mapping size), `baseAddr`
182+
// (logical mapping base), `bufferOffset` (`root - mappingBase`); a uint8 `isMemoryMapped` at byte
183+
// offset `sizeof(void*) * 6`.
184+
static char * region_root(b_obj_arg r) { return reinterpret_cast<char *>(lean_ctor_get(r, 2)); }
185+
static size_t region_size(b_obj_arg r) { return lean_ctor_get_usize(r, 3); }
186+
static size_t region_base_addr(b_obj_arg r) { return lean_ctor_get_usize(r, 4); }
187+
// `root` may legitimately sit below the mapping base (its object can be deduplicated into a
188+
// lower-addressed dep), making `bufferOffset` "negative"; recover the base in integer space so the
189+
// wrap is defined rather than UB pointer arithmetic.
190+
static char * region_buffer(b_obj_arg r) {
191+
return reinterpret_cast<char *>(reinterpret_cast<uintptr_t>(region_root(r)) - lean_ctor_get_usize(r, 5));
192+
}
193+
194+
// Extract address-range views from an `Array CompactedRegion` for cross-region pointer relocation.
195+
// The view spans the whole mapping (`[buffer, buffer + size)` at logical `baseAddr`); since pointers
196+
// only ever target objects in the data section, the header/trailer slack is harmless.
197+
static std::vector<region_view> extract_dep_regions(b_obj_arg odep_regions) {
198+
std::vector<region_view> result;
182199
size_t n = lean_array_size(odep_regions);
183200
result.reserve(n);
184201
for (size_t i = 0; i < n; i++) {
185-
result.push_back(reinterpret_cast<compacted_region *>(
186-
lean_unbox_usize(lean_array_get_core(odep_regions, i))));
202+
b_obj_arg r = lean_array_get_core(odep_regions, i);
203+
result.push_back({ region_buffer(r), region_size(r),
204+
reinterpret_cast<void *>(region_base_addr(r)) });
187205
}
188206
return result;
189207
}
@@ -274,7 +292,7 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_save(b_obj_arg ofname, b_o
274292
// so reserve a bit of space for them (0x7fff...-0x7f00... = 1TB).
275293
base_addr = base_addr % 0x7f0000000000;
276294
base_addr = base_addr & ~(ALIGN - 1);
277-
std::vector<compacted_region *> dep_regions = extract_dep_regions(odep_regions);
295+
std::vector<region_view> dep_regions = extract_dep_regions(odep_regions);
278296
cs_obj = object_ref(mk_compactor(reinterpret_cast<void *>(base_addr),
279297
std::move(dep_regions), allow_closures));
280298
}
@@ -412,6 +430,29 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_save(b_obj_arg ofname, b_o
412430
return io_result_mk_ok(cs_obj.steal());
413431
}
414432

433+
static object * mk_compacted_region(b_obj_arg ofname, b_obj_arg odep_regions, object * root,
434+
char * buffer, size_t base_addr, size_t full_sz, bool is_mmap) {
435+
size_t n = lean_array_size(odep_regions);
436+
object * dep_paths = lean_alloc_array(n, n);
437+
for (size_t i = 0; i < n; i++) {
438+
object * dep_file_path = lean_ctor_get(lean_array_get_core(odep_regions, i), 0);
439+
lean_inc(dep_file_path);
440+
lean_array_set_core(dep_paths, i, dep_file_path);
441+
}
442+
object * r = lean_alloc_ctor(0, 3, sizeof(size_t) * 3 + 1);
443+
lean_inc(ofname);
444+
lean_ctor_set(r, 0, ofname);
445+
lean_ctor_set(r, 1, dep_paths);
446+
lean_ctor_set(r, 2, root);
447+
lean_ctor_set_usize(r, 3, full_sz);
448+
lean_ctor_set_usize(r, 4, base_addr);
449+
// Integer subtraction: `root` may be below `buffer` (see `region_buffer`), and the two can be
450+
// unrelated allocations, so a pointer subtraction would be UB.
451+
lean_ctor_set_usize(r, 5, reinterpret_cast<uintptr_t>(root) - reinterpret_cast<uintptr_t>(buffer));
452+
lean_ctor_set_uint8(r, sizeof(void*) * 6, is_mmap ? 1 : 0);
453+
return r;
454+
}
455+
415456
// Implements `Lean.CompactedRegion.read`. Loads a compacted region from disk. `odep_regions`
416457
// carries `CompactedRegion`s whose address ranges must be known to resolve cross-region pointers
417458
// in this file. Returns `(α × CompactedRegion)`, where `α` is the type the Lean caller asks the
@@ -420,8 +461,8 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_save(b_obj_arg ofname, b_o
420461
// Supports both `v2` and `v3` formats.
421462
extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_obj_arg odep_regions, object *) {
422463
std::string olean_fn(lean_string_cstr(ofname));
423-
std::vector<compacted_region *> dep_regions = extract_dep_regions(odep_regions);
424464
try {
465+
std::vector<region_view> dep_regions = extract_dep_regions(odep_regions);
425466
#ifdef LEAN_WINDOWS
426467
HANDLE h_file = CreateFile(olean_fn.c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
427468
if (h_file == INVALID_HANDLE_VALUE) {
@@ -467,9 +508,8 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_o
467508

468509
char * buffer = nullptr;
469510
bool is_mmap = false;
470-
std::function<void()> free_data;
471511

472-
// Map file COW-writable. The fallback walk in `compacted_region::read()` writes
512+
// Map file COW-writable. The fallback walk in `region_reader::read()` writes
473513
// fixed pointers back into the region's memory when any dep region didn't land at its
474514
// saved `base_addr`, so the mapping must be writable. `v3` files additionally patch
475515
// closure fn pointers in place. Unwritten pages remain shared with the page cache under
@@ -485,7 +525,6 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_o
485525
lean_always_assert(CloseHandle(h_map));
486526
if (buffer && buffer == base_addr) {
487527
is_mmap = true;
488-
free_data = [=]() { lean_always_assert(UnmapViewOfFile(base_addr)); };
489528
} else if (buffer) {
490529
lean_always_assert(UnmapViewOfFile(buffer));
491530
buffer = nullptr;
@@ -505,7 +544,6 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_o
505544
mmap_flags, fd.get(), 0));
506545
if (buffer != MAP_FAILED && buffer == base_addr) {
507546
is_mmap = true;
508-
free_data = [=]() { lean_always_assert(munmap(buffer, size) == 0); };
509547
} else {
510548
if (buffer != MAP_FAILED) munmap(buffer, size);
511549
buffer = nullptr;
@@ -521,9 +559,16 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_o
521559
free(buffer);
522560
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str());
523561
}
524-
char * buf = buffer;
525-
size_t sz = size;
526-
free_data = [=]() { free_sized(buf, sz); };
562+
// The buffer outlives this call (its lifetime is the Lean `CompactedRegion`'s, freed by
563+
// `lean_compacted_region_free`). It is only reached through interior pointers (the
564+
// region's `root` and the objects read out of it), and a persistent/`leakEnv` region is
565+
// never freed at all, so tell LeakSanitizer not to report it. Under `mmap` (the common
566+
// path) the buffer is not a heap allocation and needs no such treatment.
567+
#if defined(__has_feature)
568+
#if __has_feature(address_sanitizer)
569+
__lsan_ignore_object(buffer);
570+
#endif
571+
#endif
527572
}
528573

529574
// v3 format, default data otherwise
@@ -552,24 +597,44 @@ extern "C" LEAN_EXPORT object * lean_compacted_region_read(b_obj_arg ofname, b_o
552597
}
553598
}
554599

555-
compacted_region * region = new compacted_region(
600+
region_reader reader(
556601
data_section_sz, buffer + data_section_off,
557-
base_addr + data_section_off, is_mmap, free_data,
602+
base_addr + data_section_off,
558603
std::move(dep_regions),
559604
std::move(lib_relocs), std::move(closure_offsets));
560-
#if defined(__has_feature)
561-
#if __has_feature(address_sanitizer)
562-
__lsan_ignore_object(region);
563-
#endif
564-
#endif
565-
object * mod = region->read();
605+
object * mod = reader.read();
566606
object * pair = alloc_cnstr(0, 2, 0);
567607
cnstr_set(pair, 0, mod);
568-
cnstr_set(pair, 1, box_size_t(reinterpret_cast<size_t>(region)));
608+
// The Lean region is framed by its whole mapping (`buffer`, `base_addr` = the mapped-at
609+
// logical address, `size` = the file size), not the inner data section.
610+
cnstr_set(pair, 1, mk_compacted_region(ofname, odep_regions, mod,
611+
buffer, reinterpret_cast<size_t>(base_addr), size, is_mmap));
569612
return io_result_mk_ok(pair);
570613
} catch (exception & ex) {
571614
return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str());
572615
}
573616
}
574617

618+
extern "C" LEAN_EXPORT obj_res lean_compacted_region_free(obj_arg region, object *) {
619+
char * buffer = region_buffer(region);
620+
size_t full_sz = region_size(region);
621+
bool is_mmap = lean_ctor_get_uint8(region, sizeof(void*) * 6) != 0;
622+
// `root` points into the buffer we are about to release. Overwrite it with a scalar so that
623+
// decrementing this structure now, or any reference to it that outlives the `free` (e.g. one
624+
// still sitting in `env.header.regions`), does not dereference freed memory. The old value is a
625+
// refcount-free buffer object, so it needs no decrement.
626+
lean_ctor_set(region, 2, lean_box(0));
627+
lean_dec_ref(region);
628+
if (is_mmap) {
629+
#ifdef LEAN_WINDOWS
630+
lean_always_assert(UnmapViewOfFile(buffer));
631+
#else
632+
lean_always_assert(munmap(buffer, full_sz) == 0);
633+
#endif
634+
} else {
635+
free_sized(buffer, full_sz);
636+
}
637+
return lean_io_result_mk_ok(lean_box(0));
638+
}
639+
575640
}

0 commit comments

Comments
 (0)