From 50e838e3fbc8a738f6ff1ac883cab35b07ad7255 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 28 Aug 2025 10:28:49 -0400 Subject: [PATCH 1/5] midx-write: only load initialized packs The fill_packs_from_midx() method was refactored in fcb2205b77 (midx: implement support for writing incremental MIDX chains, 2024-08-06) to allow for preferred packfiles and incremental multi-pack-indexes. However, this led to some conditions that can cause improperly initialized memory in the context's list of packfiles. The conditions caring about the preferred pack name or the incremental flag are currently necessary to load a packfile. But the context is still being populated with pack_info structs based on the packfile array for the existing multi-pack-index even if prepare_midx_pack() isn't called. Add a new test that breaks under --stress when compiled with SANITIZE=address. The chosen number of 100 packfiles was selected to get the --stress output to fail about 50% of the time, while 50 packfiles could not get a failure in most --stress runs. This test has a very minor check at the end confirming only one packfile remaining. The failing nature of this test actually relies on auto-GC cleaning up some packfiles during the creation of the commits, as tests setting gc.auto to zero make the packfile count match the number of added commits but also avoids hitting the memory issue. The test case is marked as EXPENSIVE not only because of the number of packfiles it creates, but because some CI environments were reporting errors during the test that I could not reproduce, specifically around being unable to open the packfiles or their pack-indexes. When it fails under SANITIZE=address, it provides the following error: AddressSanitizer:DEADLYSIGNAL ================================================================= ==3263517==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000027 ==3263517==The signal is caused by a READ memory access. ==3263517==Hint: address points to the zero page. #0 0x562d5d82d1fb in close_pack_windows packfile.c:299 #1 0x562d5d82d3ab in close_pack packfile.c:354 #2 0x562d5d7bfdb4 in write_midx_internal midx-write.c:1490 #3 0x562d5d7c7aec in midx_repack midx-write.c:1795 #4 0x562d5d46fff6 in cmd_multi_pack_index builtin/multi-pack-index.c:305 ... This failure stack trace is disconnected from the real fix because it the bad pointers are accessed later when closing the packfiles from the context. There are a few different aspects to this fix that are worth noting: 1. We return to the previous behavior of fill_packs_from_midx to not rely on the incremental flag or existence of a preferred pack. 2. The behavior to scan all layers of an incremental midx is kept, so this is not a full revert of the change. 3. We skip allocating more room in the pack_info array if the pack fails prepare_midx_pack(). 4. The method has always returned 0 for success and 1 for failure, but the condition checking for error added a check for a negative result for failure, so that is now updated. 5. The call to open_pack_index() is removed, but this is needed later in the case of a preferred pack. That call is moved to immediately before its result is needed (checking for the object count). Signed-off-by: Derrick Stolee --- midx-write.c | 38 ++++++++++++------------------------- t/t5319-multi-pack-index.sh | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/midx-write.c b/midx-write.c index a0aceab5e0d1ac..d8f9679868defa 100644 --- a/midx-write.c +++ b/midx-write.c @@ -920,8 +920,7 @@ static struct multi_pack_index *lookup_multi_pack_index(struct repository *r, return get_multi_pack_index(source); } -static int fill_packs_from_midx(struct write_midx_context *ctx, - const char *preferred_pack_name, uint32_t flags) +static int fill_packs_from_midx(struct write_midx_context *ctx) { struct multi_pack_index *m; @@ -929,30 +928,13 @@ static int fill_packs_from_midx(struct write_midx_context *ctx, uint32_t i; for (i = 0; i < m->num_packs; i++) { - ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - - /* - * If generating a reverse index, need to have - * packed_git's loaded to compare their - * mtimes and object count. - * - * If a preferred pack is specified, need to - * have packed_git's loaded to ensure the chosen - * preferred pack has a non-zero object count. - */ - if (flags & MIDX_WRITE_REV_INDEX || - preferred_pack_name) { - if (prepare_midx_pack(ctx->repo, m, - m->num_packs_in_base + i)) { - error(_("could not load pack")); - return 1; - } - - if (open_pack_index(m->packs[i])) - die(_("could not open index for %s"), - m->packs[i]->pack_name); + if (prepare_midx_pack(ctx->repo, m, + m->num_packs_in_base + i)) { + error(_("could not load pack")); + return 1; } + ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); fill_pack_info(&ctx->info[ctx->nr++], m->packs[i], m->pack_names[i], m->num_packs_in_base + i); @@ -1123,8 +1105,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, ctx.num_multi_pack_indexes_before++; m = m->base_midx; } - } else if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name, - flags) < 0) { + } else if (ctx.m && fill_packs_from_midx(&ctx)) { goto cleanup; } @@ -1223,6 +1204,11 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (ctx.preferred_pack_idx > -1) { struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p; + + if (open_pack_index(preferred)) + die(_("failed to open preferred pack %s"), + ctx.info[ctx.preferred_pack_idx].pack_name); + if (!preferred->num_objects) { error(_("cannot select preferred pack %s with no objects"), preferred->pack_name); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index bd75dea9501ed7..49705c62a25202 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -989,6 +989,23 @@ test_expect_success 'repack --batch-size=0 repacks everything' ' ) ' +test_expect_success EXPENSIVE 'repack/expire with many packs' ' + cp -r dup many && + ( + cd many && + + for i in $(test_seq 1 100) + do + test_commit extra$i && + git maintenance run --task=loose-objects || return 1 + done && + + git multi-pack-index write && + git multi-pack-index repack && + git multi-pack-index expire + ) +' + test_expect_success 'repack --batch-size= repacks everything' ' ( cd dup2 && From 474f647f348a1c3e0eacece67c130724ca2edb56 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 25 Aug 2025 14:18:06 -0400 Subject: [PATCH 2/5] midx-write: put failing response value back This instance of setting the result to 1 before going to cleanup was accidentally removed in fcb2205b77 (midx: implement support for writing incremental MIDX chains, 2024-08-06). Signed-off-by: Derrick Stolee --- midx-write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/midx-write.c b/midx-write.c index d8f9679868defa..85b2d471efc7b5 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1106,6 +1106,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, m = m->base_midx; } } else if (ctx.m && fill_packs_from_midx(&ctx)) { + result = 1; goto cleanup; } From 06429f5f6fa02bdf688c388ad4ed01b95415d752 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 25 Aug 2025 13:27:31 -0400 Subject: [PATCH 3/5] midx-write: use cleanup when incremental midx fails The incremental mode of writing a multi-pack-index has a few extra conditions that could lead to failure, but these are currently short-ciruiting with 'return -1' instead of setting the method's 'result' variable and going to the cleanup tag. Replace these returns with gotos to avoid memory issues when exiting early due to error conditions. Unfortunately, these error conditions are difficult to reproduce with test cases, which is perhaps one reason why the memory loss was not caught by existing test cases in memory tracking modes. Signed-off-by: Derrick Stolee --- midx-write.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/midx-write.c b/midx-write.c index 85b2d471efc7b5..f2d9a990e66f3b 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1321,13 +1321,15 @@ static int write_midx_internal(struct repository *r, const char *object_dir, incr = mks_tempfile_m(midx_name.buf, 0444); if (!incr) { error(_("unable to create temporary MIDX layer")); - return -1; + result = -1; + goto cleanup; } if (adjust_shared_perm(r, get_tempfile_path(incr))) { error(_("unable to adjust shared permissions for '%s'"), get_tempfile_path(incr)); - return -1; + result = -1; + goto cleanup; } f = hashfd(r->hash_algo, get_tempfile_fd(incr), @@ -1427,18 +1429,22 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (!chainf) { error_errno(_("unable to open multi-pack-index chain file")); - return -1; + result = -1; + goto cleanup; } - if (link_midx_to_chain(ctx.base_midx) < 0) - return -1; + if (link_midx_to_chain(ctx.base_midx) < 0) { + result = -1; + goto cleanup; + } get_split_midx_filename_ext(r->hash_algo, &final_midx_name, object_dir, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { error_errno(_("unable to rename new multi-pack-index layer")); - return -1; + result = -1; + goto cleanup; } strbuf_release(&final_midx_name); From 6c91e973056899479d51ae218be164004a065f45 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 28 Aug 2025 10:44:29 -0400 Subject: [PATCH 4/5] midx-write: use uint32_t for preferred_pack_idx midx-write.c has the DISABLE_SIGN_COMPARE_WARNINGS macro defined for a few reasons, but the biggest one is the use of a signed preferred_pack_idx member inside the write_midx_context struct. The code currently uses -1 to indicate an unset preferred pack but pack int ids are normally handled as uint32_t. There are also a few loops that search for the preferred pack by name and those iterators will need updates to uint32_t in the next change. For now, replace the use of -1 with a 'NO_PREFERRED_PACK' macro and an equality check. The macro stores the max value of a uint32_t, so we cannot store a preferred pack that appears last in a list of 2^32 total packs, but that's expected to be unreasonable already. This improves the range from 2^31 already. There are some careful things to worry about with initializing the preferred pack in the struct and using that value when searching for a preferred pack that was already incorrect but accidentally working when the index was initialized to zero. Signed-off-by: Derrick Stolee --- midx-write.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/midx-write.c b/midx-write.c index f2d9a990e66f3b..ea1b3a199c11f9 100644 --- a/midx-write.c +++ b/midx-write.c @@ -24,6 +24,7 @@ #define BITMAP_POS_UNKNOWN (~((uint32_t)0)) #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) +#define NO_PREFERRED_PACK (~((uint32_t)0)) extern int midx_checksum_valid(struct multi_pack_index *m); extern void clear_midx_files_ext(const char *object_dir, const char *ext, @@ -104,7 +105,7 @@ struct write_midx_context { unsigned large_offsets_needed:1; uint32_t num_large_offsets; - int preferred_pack_idx; + uint32_t preferred_pack_idx; int incremental; uint32_t num_multi_pack_indexes_before; @@ -260,7 +261,7 @@ static void midx_fanout_sort(struct midx_fanout *fanout) static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, struct multi_pack_index *m, uint32_t cur_fanout, - int preferred_pack) + uint32_t preferred_pack) { uint32_t start = m->num_objects_in_base, end; uint32_t cur_object; @@ -274,7 +275,7 @@ static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, end = m->num_objects_in_base + ntohl(m->chunk_oid_fanout[cur_fanout]); for (cur_object = start; cur_object < end; cur_object++) { - if ((preferred_pack > -1) && + if ((preferred_pack != NO_PREFERRED_PACK) && (preferred_pack == nth_midxed_pack_int_id(m, cur_object))) { /* * Objects from preferred packs are added @@ -364,7 +365,8 @@ static void compute_sorted_entries(struct write_midx_context *ctx, preferred, cur_fanout); } - if (-1 < ctx->preferred_pack_idx && ctx->preferred_pack_idx < start_pack) + if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && + ctx->preferred_pack_idx < start_pack) midx_fanout_add_pack_fanout(&fanout, ctx->info, ctx->preferred_pack_idx, 1, cur_fanout); @@ -1042,7 +1044,9 @@ static int write_midx_internal(struct repository *r, const char *object_dir, struct hashfile *f = NULL; struct lock_file lk; struct tempfile *incr; - struct write_midx_context ctx = { 0 }; + struct write_midx_context ctx = { + .preferred_pack_idx = NO_PREFERRED_PACK, + }; int bitmapped_packs_concat_len = 0; int pack_name_concat_len = 0; int dropped_packs = 0; @@ -1150,7 +1154,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, goto cleanup; /* nothing to do */ if (preferred_pack_name) { - ctx.preferred_pack_idx = -1; + ctx.preferred_pack_idx = NO_PREFERRED_PACK; for (i = 0; i < ctx.nr; i++) { if (!cmp_idx_or_pack_name(preferred_pack_name, @@ -1160,12 +1164,12 @@ static int write_midx_internal(struct repository *r, const char *object_dir, } } - if (ctx.preferred_pack_idx == -1) + if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) warning(_("unknown preferred pack: '%s'"), preferred_pack_name); } else if (ctx.nr && (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { - struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p; + struct packed_git *oldest = ctx.info[0].p; ctx.preferred_pack_idx = 0; if (packs_to_drop && packs_to_drop->nr) @@ -1193,17 +1197,17 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * objects to resolve, so the preferred value doesn't * matter. */ - ctx.preferred_pack_idx = -1; + ctx.preferred_pack_idx = NO_PREFERRED_PACK; } } else { /* * otherwise don't mark any pack as preferred to avoid * interfering with expiration logic below */ - ctx.preferred_pack_idx = -1; + ctx.preferred_pack_idx = NO_PREFERRED_PACK; } - if (ctx.preferred_pack_idx > -1) { + if (ctx.preferred_pack_idx != NO_PREFERRED_PACK) { struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p; if (open_pack_index(preferred)) From abe22e5f0ac326ce5c71ca1d0983127802e29ee5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 28 Aug 2025 10:55:27 -0400 Subject: [PATCH 5/5] midx-write: reenable signed comparison errors Remove the remaining signed comparison warnings in midx-write.c so that they can be enforced as errors in the future. After the previous change, the remaining errors are due to iterator variables named 'i'. The strategy here involves defining the variable within the for loop syntax to make sure we use the appropriate bitness for the loop sentinel. This matters in at least one method where the variable was compared to uint32_t in some loops and size_t in others. While adjusting these loops, there were some where the loop boundary was checking against a uint32_t value _plus one_. These were replaced with non-strict comparisons, but also the value is checked to not be UINT32_MAX. Since the value is the number of incremental multi-pack- indexes, this is not a meaningful restriction. The new die() is about defensive programming more than it being realistically possible. Signed-off-by: Derrick Stolee --- midx-write.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/midx-write.c b/midx-write.c index ea1b3a199c11f9..93f33e5baa03da 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1,5 +1,3 @@ -#define DISABLE_SIGN_COMPARE_WARNINGS - #include "git-compat-util.h" #include "abspath.h" #include "config.h" @@ -845,7 +843,7 @@ static int write_midx_bitmap(struct write_midx_context *ctx, uint32_t commits_nr, unsigned flags) { - int ret, i; + int ret; uint16_t options = 0; struct bitmap_writer writer; struct pack_idx_entry **index; @@ -873,7 +871,7 @@ static int write_midx_bitmap(struct write_midx_context *ctx, * this order). */ ALLOC_ARRAY(index, pdata->nr_objects); - for (i = 0; i < pdata->nr_objects; i++) + for (uint32_t i = 0; i < pdata->nr_objects; i++) index[i] = &pdata->objects[i].idx; bitmap_writer_init(&writer, ctx->repo, pdata, @@ -894,7 +892,7 @@ static int write_midx_bitmap(struct write_midx_context *ctx, * happens between bitmap_writer_build_type_index() and * bitmap_writer_finish(). */ - for (i = 0; i < pdata->nr_objects; i++) + for (uint32_t i = 0; i < pdata->nr_objects; i++) index[ctx->pack_order[i]] = &pdata->objects[i].idx; bitmap_writer_select_commits(&writer, commits, commits_nr); @@ -1040,7 +1038,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, { struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; - uint32_t i, start_pack; + uint32_t start_pack; struct hashfile *f = NULL; struct lock_file lk; struct tempfile *incr; @@ -1156,7 +1154,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, if (preferred_pack_name) { ctx.preferred_pack_idx = NO_PREFERRED_PACK; - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (!cmp_idx_or_pack_name(preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; @@ -1181,7 +1179,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * pack-order has all of its objects selected from that pack * (and not another pack containing a duplicate) */ - for (i = 1; i < ctx.nr; i++) { + for (size_t i = 1; i < ctx.nr; i++) { struct packed_git *p = ctx.info[i].p; if (!oldest->num_objects || p->mtime < oldest->mtime) { @@ -1225,7 +1223,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, compute_sorted_entries(&ctx, start_pack); ctx.large_offsets_needed = 0; - for (i = 0; i < ctx.entries_nr; i++) { + for (size_t i = 0; i < ctx.entries_nr; i++) { if (ctx.entries[i].offset > 0x7fffffff) ctx.num_large_offsets++; if (ctx.entries[i].offset > 0xffffffff) @@ -1235,10 +1233,10 @@ static int write_midx_internal(struct repository *r, const char *object_dir, QSORT(ctx.info, ctx.nr, pack_info_compare); if (packs_to_drop && packs_to_drop->nr) { - int drop_index = 0; + size_t drop_index = 0; int missing_drops = 0; - for (i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { + for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, packs_to_drop->items[drop_index].string); @@ -1269,7 +1267,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * pack_perm[old_id] = new_id */ ALLOC_ARRAY(ctx.pack_perm, ctx.nr); - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (ctx.info[i].expired) { dropped_packs++; ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED; @@ -1278,7 +1276,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, } } - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (ctx.info[i].expired) continue; pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; @@ -1424,6 +1422,9 @@ static int write_midx_internal(struct repository *r, const char *object_dir, * have been freed in the previous if block. */ + if (ctx.num_multi_pack_indexes_before == UINT32_MAX) + die("too many multi-pack-indexes"); + CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); if (ctx.incremental) { @@ -1456,7 +1457,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, keep_hashes[ctx.num_multi_pack_indexes_before] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); - for (i = 0; i < ctx.num_multi_pack_indexes_before; i++) { + for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), @@ -1464,7 +1465,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, m = m->base_midx; } - for (i = 0; i < ctx.num_multi_pack_indexes_before + 1; i++) + for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = @@ -1482,7 +1483,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, ctx.incremental); cleanup: - for (i = 0; i < ctx.nr; i++) { + for (size_t i = 0; i < ctx.nr; i++) { if (ctx.info[i].p) { close_pack(ctx.info[i].p); free(ctx.info[i].p); @@ -1495,7 +1496,7 @@ static int write_midx_internal(struct repository *r, const char *object_dir, free(ctx.pack_perm); free(ctx.pack_order); if (keep_hashes) { - for (i = 0; i < ctx.num_multi_pack_indexes_before + 1; i++) + for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) free((char *)keep_hashes[i]); free(keep_hashes); }