From 20e2baf206e4842f2a90044c8e399485ec71b928 Mon Sep 17 00:00:00 2001 From: David First Date: Fri, 29 May 2026 08:20:31 -0400 Subject: [PATCH 1/4] fix(ci): recover from a stale remote lane by recreating it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When --keep-lane finds the lane on the remote, it calls switchToLane and trusts that as the way to land on the existing lane. If the stored lane is stale relative to the current PR (the PR has since removed/renamed a ModelComponent the lane still references, surfaced as 'unable to merge lane …, the component … was not found'), the switch fails — switchToLane logs and swallows the error, the post-switch guard then throws, and every subsequent 'bit ci pr --keep-lane' on that branch fails the same way until the lane is manually deleted on the remote. Replace the throw with a recovery path: when we observe that we did not land on the requested lane after switching, delete the stale remote lane and create a fresh one (same name) — the same shape the create-from-scratch path takes. The lane's prior history is lost on this one run, but the next run preserves history again, and we're no longer wedged. Surfaced by PR #10397 in CI; reachable on any long-running PR whose component shape changed after a lane was first created. --- scopes/git/ci/ci.main.runtime.ts | 67 +++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/scopes/git/ci/ci.main.runtime.ts b/scopes/git/ci/ci.main.runtime.ts index 41b38b1626d5..577cba739bab 100644 --- a/scopes/git/ci/ci.main.runtime.ts +++ b/scopes/git/ci/ci.main.runtime.ts @@ -611,34 +611,55 @@ export class CiMain { // switchToLane fetches the latest lane head from remote. this.logger.console(chalk.blue(`Lane ${laneId.toString()} exists on remote, reusing it`)); await this.switchToLane(laneId.toString()); - // Verify the switch actually landed us on the lane before doing any lane work. - // switchToLane logs-and-swallows switch failures, so without this guard a failed switch - // would let syncConfigFromMain and snap run against the wrong lane. + // switchToLane logs-and-swallows switch failures, so we have to look at the resulting + // current-lane state to know whether the switch actually landed. const switchedLane = await this.lanes.getCurrentLane(); - if (switchedLane?.name !== laneId.name) { - throw new Error( - `Expected to be on lane ${laneId.name} after switching, but current lane is ${switchedLane?.name ?? 'main'}` - ); - } - // Sync config-only changes from main onto the lane, so config that was tagged into objects - // on main since the lane forked (e.g. `bit deps set` / `bit env set` from another PR, not - // visible via the workspace's git checkout) is reflected on the lane. Source files are - // git's job — see syncConfigFromMain. - // - // BUT only when the PR branch is actually up to date with the default branch. If the PR is - // behind (hasn't pulled main's latest), its git checkout still reflects the older fork - // point, so pulling main's newer config onto the lane would desync the lane from the - // source. The author merges the default branch into their PR in git; the next `bit ci pr` - // then propagates it here. - if (await this.isBranchBehindDefaultBranch()) { + if (switchedLane?.name === laneId.name) { + // Sync config-only changes from main onto the lane, so config that was tagged into + // objects on main since the lane forked (e.g. `bit deps set` / `bit env set` from + // another PR, not visible via the workspace's git checkout) is reflected on the lane. + // Source files are git's job — see syncConfigFromMain. + // + // BUT only when the PR branch is actually up to date with the default branch. If the PR + // is behind (hasn't pulled main's latest), its git checkout still reflects the older + // fork point, so pulling main's newer config onto the lane would desync the lane from + // the source. The author merges the default branch into their PR in git; the next + // `bit ci pr` then propagates it here. + if (await this.isBranchBehindDefaultBranch()) { + this.logger.console( + chalk.yellow( + `PR branch is behind the default branch — skipping config sync from main. ` + + `Merge or rebase the default branch into your PR to pick up main's latest config.` + ) + ); + } else { + await this.syncConfigFromMain(laneId); + } + } else { + // Switch failed even though the remote lane exists — typically the stored lane is stale + // relative to the current PR: it references a ModelComponent that the PR has since + // removed/renamed (`unable to merge lane …, the component … was not found`), or it was + // produced by an earlier shape of the PR that no longer matches. The red `Failed + // switching to …` line above (logged by `switchToLane`) carries the underlying error. + // + // We can't reuse a lane we can't switch to, so fall back to the create path: delete the + // stale remote lane and start a fresh one. The lane's prior history is lost, but that's + // strictly better than failing every subsequent `bit ci pr` run on this branch — once + // the fresh lane is in place, the next run will preserve history again. this.logger.console( chalk.yellow( - `PR branch is behind the default branch — skipping config sync from main. ` + - `Merge or rebase the default branch into your PR to pick up main's latest config.` + `Stale remote lane ${laneId.toString()} — switching failed. ` + + `Deleting it and creating a fresh lane to recover.` ) ); - } else { - await this.syncConfigFromMain(laneId); + await this.lanes.removeLanes([laneId.toString()], { remote: true, force: true }).catch((e) => { + throw new Error(`Failed to delete stale remote lane ${laneId.toString()}: ${e.toString()}`); + }); + const createLaneResult = await this.lanes.createLane(laneId.name, { + scope: laneId.scope, + forkLaneNewScope: true, + }); + this.logger.console(chalk.blue(`Recreated lane ${laneId.toString()} (hash: ${createLaneResult.hash})`)); } } else { this.logger.console(chalk.blue(`Creating lane ${laneId.toString()}`)); From c3dcd87ca10f44b894af6aefeb99f390c7903057 Mon Sep 17 00:00:00 2001 From: David First Date: Fri, 29 May 2026 09:09:04 -0400 Subject: [PATCH 2/4] fix(ci): drop the local lane object before recreating, not just the remote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit switchToLane fetches the remote lane and persists it into the local scope's lane index (importLaneObject → legacyScope.lanes.saveLane) BEFORE the underlying merge step throws. removeLanes({remote: true, force: true}) only deletes the remote — it returns early in remove-lanes.ts:8-19 and never touches local state. createLane's existing-lane guard then sees the locally-persisted stale copy and throws 'lane … already exists', and the recovery crashes on the very next line — the exact failure mode the recovery was added to fix. Trash the local lane object via legacyScope.objects.moveObjectsToTrash between the remote delete and createLane, mirroring the pattern used in rebaseOntoRemoteLane. Also defensive nullish on .toString() so a non-Error rejection doesn't compound the failure with a TypeError. --- scopes/git/ci/ci.main.runtime.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scopes/git/ci/ci.main.runtime.ts b/scopes/git/ci/ci.main.runtime.ts index 577cba739bab..4fc64472db3d 100644 --- a/scopes/git/ci/ci.main.runtime.ts +++ b/scopes/git/ci/ci.main.runtime.ts @@ -653,8 +653,18 @@ export class CiMain { ) ); await this.lanes.removeLanes([laneId.toString()], { remote: true, force: true }).catch((e) => { - throw new Error(`Failed to delete stale remote lane ${laneId.toString()}: ${e.toString()}`); + throw new Error(`Failed to delete stale remote lane ${laneId.toString()}: ${e?.toString() ?? e}`); }); + // switchToLane fetched the remote lane and persisted it into the local scope's lane + // index (via `importLaneObject` → `legacyScope.lanes.saveLane`) BEFORE the underlying + // merge failed. Without dropping that local copy here, the upcoming `createLane` would + // hit the "lane … already exists" guard in create-lane.ts and the recovery would crash + // on the very next line. Same trash-the-local-object pattern as `rebaseOntoRemoteLane`. + const legacyScope = this.workspace.scope.legacyScope; + const localLane = await legacyScope.loadLane(laneId); + if (localLane) { + await legacyScope.objects.moveObjectsToTrash([localLane.hash()]); + } const createLaneResult = await this.lanes.createLane(laneId.name, { scope: laneId.scope, forkLaneNewScope: true, From 2f9e2b4f00b47937c62846ee5355662145f425f8 Mon Sep 17 00:00:00 2001 From: David First Date: Fri, 29 May 2026 09:14:21 -0400 Subject: [PATCH 3/4] fix(ci): gate destructive stale-lane recovery on the specific error marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-up fixes to the recovery path: 1. Only enter the destructive 'delete-remote + recreate' path when the switch failure actually carries the stale-lane marker ('unable to merge lane'). For any other failure class (transient network blip during fetchLaneComponents, auth/permission error, lane locked by Cloud UI, remote 5xx) we now rethrow with the original error, instead of silently wiping the lane and its Cloud UI history every time CI is flaky. Mechanism: switchToLane now returns the caught Error (or undefined on success); the caller inspects it. No existing caller of switchToLane uses the return value, so the signature change is local. 2. Switch to main before createLane. createLane populates new lanes from consumer.getCurrentLaneObject() regardless of forkLaneNewScope (which only suppresses the cross-scope guard) — without this reset, the 'fresh' lane silently inherits the bitmap's current-lane components when a developer runs bit ci pr from a non-default lane. 3. Compare scope AND name when checking whether the switch landed. A same-named lane in a different scope can no longer masquerade as a successful switch. 4. Tolerate 'was not found' from removeLanes — a concurrent recovery run on the same PR can race and delete the lane first; the desired post-condition (lane gone from remote) is already met, so we proceed instead of failing. --- scopes/git/ci/ci.main.runtime.ts | 72 ++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/scopes/git/ci/ci.main.runtime.ts b/scopes/git/ci/ci.main.runtime.ts index 4fc64472db3d..790f5f3a6ca0 100644 --- a/scopes/git/ci/ci.main.runtime.ts +++ b/scopes/git/ci/ci.main.runtime.ts @@ -329,7 +329,13 @@ export class CiMain { return { status }; } - private async switchToLane(laneName: string, options: SwitchLaneOptions = {}) { + /** + * Returns the caught Error on failure, or undefined on success (including the "already checked + * out" no-op case). Callers that need to react to a specific failure mode (e.g. stale lane) can + * inspect the returned error; existing callers ignore it and rely on a follow-up + * `getCurrentLane()` check. + */ + private async switchToLane(laneName: string, options: SwitchLaneOptions = {}): Promise { this.logger.console(chalk.blue(`Switching to ${laneName}`)); try { await this.lanes.switchLanes(laneName, { @@ -339,12 +345,14 @@ export class CiMain { ...options, }); } catch (e: any) { - if (e.toString().includes('already checked out')) { + if (e?.toString().includes('already checked out')) { this.logger.console(chalk.yellow(`Lane ${laneName} already checked out, skipping checkout`)); - return true; + return undefined; } - this.logger.console(chalk.red(`Failed switching to ${laneName}: ${e.toString()}`)); + this.logger.console(chalk.red(`Failed switching to ${laneName}: ${e?.toString() ?? e}`)); + return e; } + return undefined; } /** @@ -610,11 +618,13 @@ export class CiMain { // lane-history feature on Bit Cloud all survive across subsequent commits to the same PR. // switchToLane fetches the latest lane head from remote. this.logger.console(chalk.blue(`Lane ${laneId.toString()} exists on remote, reusing it`)); - await this.switchToLane(laneId.toString()); - // switchToLane logs-and-swallows switch failures, so we have to look at the resulting - // current-lane state to know whether the switch actually landed. + const switchErr = await this.switchToLane(laneId.toString()); + // switchToLane returns the caught error (undefined on success). Combine with a + // current-lane-state probe — comparing BOTH name AND scope, so a same-named lane in a + // different scope can't masquerade as a successful switch. const switchedLane = await this.lanes.getCurrentLane(); - if (switchedLane?.name === laneId.name) { + const landedOnLane = switchedLane?.name === laneId.name && switchedLane?.scope === laneId.scope; + if (landedOnLane) { // Sync config-only changes from main onto the lane, so config that was tagged into // objects on main since the lane forked (e.g. `bit deps set` / `bit env set` from // another PR, not visible via the workspace's git checkout) is reflected on the lane. @@ -636,16 +646,22 @@ export class CiMain { await this.syncConfigFromMain(laneId); } } else { - // Switch failed even though the remote lane exists — typically the stored lane is stale - // relative to the current PR: it references a ModelComponent that the PR has since - // removed/renamed (`unable to merge lane …, the component … was not found`), or it was - // produced by an earlier shape of the PR that no longer matches. The red `Failed - // switching to …` line above (logged by `switchToLane`) carries the underlying error. - // - // We can't reuse a lane we can't switch to, so fall back to the create path: delete the - // stale remote lane and start a fresh one. The lane's prior history is lost, but that's - // strictly better than failing every subsequent `bit ci pr` run on this branch — once - // the fresh lane is in place, the next run will preserve history again. + // Switch failed even though the remote lane exists. The destructive recovery below + // (delete the remote lane + recreate fresh) is safe only when the failure is the + // specific "stale lane" pattern — the lane references a ModelComponent the PR has + // since removed/renamed (`unable to merge lane …, the component … was not found`). + // For any other failure (transient network blip during fetch, auth error, lane locked + // by Cloud UI, etc.) destroying lane history would be the wrong response, so we + // rethrow and let the caller report the real cause. + const errMsg = switchErr?.toString() ?? ''; + const isStaleLane = errMsg.includes('unable to merge lane'); + if (!isStaleLane) { + throw new Error( + `Failed to switch to remote lane ${laneId.toString()}: ${errMsg || '(no error captured)'}. ` + + `Refusing destructive recovery for this failure class — the error doesn't match the ` + + `stale-lane marker, so deleting the lane could destroy real history. Investigate or retry.` + ); + } this.logger.console( chalk.yellow( `Stale remote lane ${laneId.toString()} — switching failed. ` + @@ -653,18 +669,32 @@ export class CiMain { ) ); await this.lanes.removeLanes([laneId.toString()], { remote: true, force: true }).catch((e) => { - throw new Error(`Failed to delete stale remote lane ${laneId.toString()}: ${e?.toString() ?? e}`); + const msg = e?.toString() ?? ''; + // Tolerate the race where another concurrent recovery deleted the lane first — the + // desired post-condition (lane gone from remote) is already met. + if (msg.includes('was not found') || msg.includes('not found')) { + this.logger.console(chalk.blue(`Remote lane ${laneId.toString()} was already gone — proceeding`)); + return; + } + throw new Error(`Failed to delete stale remote lane ${laneId.toString()}: ${msg || e}`); }); // switchToLane fetched the remote lane and persisted it into the local scope's lane // index (via `importLaneObject` → `legacyScope.lanes.saveLane`) BEFORE the underlying // merge failed. Without dropping that local copy here, the upcoming `createLane` would - // hit the "lane … already exists" guard in create-lane.ts and the recovery would crash - // on the very next line. Same trash-the-local-object pattern as `rebaseOntoRemoteLane`. + // hit the "lane … already exists" guard in create-lane.ts. Same trash-the-local-object + // pattern as `rebaseOntoRemoteLane`. const legacyScope = this.workspace.scope.legacyScope; const localLane = await legacyScope.loadLane(laneId); if (localLane) { await legacyScope.objects.moveObjectsToTrash([localLane.hash()]); } + // Reset the workspace's current-lane pointer to main before createLane, so the new lane + // is forked from main with an empty component list. `createLane` populates new lanes + // from `consumer.getCurrentLaneObject()` regardless of `forkLaneNewScope` (which only + // suppresses the cross-scope guard) — if `originalLane` is non-default (a developer + // running `bit ci pr` from a lane), without this reset the "fresh" lane would silently + // inherit `originalLane`'s components. + await this.switchToLane('main'); const createLaneResult = await this.lanes.createLane(laneId.name, { scope: laneId.scope, forkLaneNewScope: true, From 223793898e7793c669a0fd1bfcb231e5e5d65b87 Mon Sep 17 00:00:00 2001 From: David First Date: Fri, 29 May 2026 09:42:38 -0400 Subject: [PATCH 4/4] fix(ci): shrink stale-lane recovery race + check switchToLane('main') return MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups to Copilot review on the stale-lane recovery path: 1. Re-check the remote lane's hash immediately before the destructive removeLanes. The central-hub delete API is name-based (no compare-and-swap), so two CI jobs racing the same recovery could otherwise have job B delete job A's freshly-recreated lane. Re-fetching here shrinks the TOCTOU window from seconds to milliseconds; if A's recreate landed first the hash differs, we skip the delete entirely, and the downstream export hits the lane-hash mismatch and lands in exportWithAdoptOnConflict (which rebases our snaps onto the winner's lane — no destroyed history). 2. Check switchToLane('main') return value (and follow-up getCurrentLane) before createLane. The reset to main is load-bearing — createLane copies from consumer.getCurrentLaneObject() regardless of forkLaneNewScope, so a silent failure of the reset would silently fork the 'fresh' lane from originalLane. Now we throw with a clear message instead of producing a wrong-source lane. --- scopes/git/ci/ci.main.runtime.ts | 54 +++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/scopes/git/ci/ci.main.runtime.ts b/scopes/git/ci/ci.main.runtime.ts index 790f5f3a6ca0..f517de6b4c2f 100644 --- a/scopes/git/ci/ci.main.runtime.ts +++ b/scopes/git/ci/ci.main.runtime.ts @@ -668,16 +668,37 @@ export class CiMain { `Deleting it and creating a fresh lane to recover.` ) ); - await this.lanes.removeLanes([laneId.toString()], { remote: true, force: true }).catch((e) => { - const msg = e?.toString() ?? ''; - // Tolerate the race where another concurrent recovery deleted the lane first — the - // desired post-condition (lane gone from remote) is already met. - if (msg.includes('was not found') || msg.includes('not found')) { - this.logger.console(chalk.blue(`Remote lane ${laneId.toString()} was already gone — proceeding`)); - return; - } - throw new Error(`Failed to delete stale remote lane ${laneId.toString()}: ${msg || e}`); - }); + // Re-check the remote lane's hash immediately before deleting. The central-hub delete + // API is name-based — there's no compare-and-swap — so two CI jobs racing the same + // recovery could otherwise have job B delete job A's freshly-recreated lane. By + // re-fetching here we shrink the TOCTOU window to milliseconds: if A's recreate landed + // before our re-fetch, the hash changed and we skip the delete entirely. The downstream + // export then hits the lane-hash mismatch and lands in `exportWithAdoptOnConflict`, + // which rebases our snaps onto the winner's lane — no destroyed history. + const staleHash = existingLanes[0]?.hash; + const recheck = await this.lanes.getLanes({ remote: laneId.scope, name: laneId.name }).catch(() => []); + const currentRemoteHash = recheck[0]?.hash; + const remoteChanged = staleHash && currentRemoteHash && currentRemoteHash !== staleHash; + if (remoteChanged) { + this.logger.console( + chalk.blue( + `Remote lane ${laneId.toString()} changed since we first checked (hash ` + + `${staleHash.slice(0, 9)} → ${currentRemoteHash.slice(0, 9)}) — another concurrent ` + + `recovery already recreated it. Skipping the delete; export will adopt-on-conflict.` + ) + ); + } else { + await this.lanes.removeLanes([laneId.toString()], { remote: true, force: true }).catch((e) => { + const msg = e?.toString() ?? ''; + // Tolerate the race where another concurrent recovery deleted the lane first — the + // desired post-condition (lane gone from remote) is already met. + if (msg.includes('was not found') || msg.includes('not found')) { + this.logger.console(chalk.blue(`Remote lane ${laneId.toString()} was already gone — proceeding`)); + return; + } + throw new Error(`Failed to delete stale remote lane ${laneId.toString()}: ${msg || e}`); + }); + } // switchToLane fetched the remote lane and persisted it into the local scope's lane // index (via `importLaneObject` → `legacyScope.lanes.saveLane`) BEFORE the underlying // merge failed. Without dropping that local copy here, the upcoming `createLane` would @@ -693,8 +714,17 @@ export class CiMain { // from `consumer.getCurrentLaneObject()` regardless of `forkLaneNewScope` (which only // suppresses the cross-scope guard) — if `originalLane` is non-default (a developer // running `bit ci pr` from a lane), without this reset the "fresh" lane would silently - // inherit `originalLane`'s components. - await this.switchToLane('main'); + // inherit `originalLane`'s components. Check the return value: a silent failure here + // would defeat the whole point of the reset. + const resetErr = await this.switchToLane('main'); + const afterReset = await this.lanes.getCurrentLane(); + if (resetErr || afterReset) { + throw new Error( + `Failed to reset to main before recreating ${laneId.toString()}: ` + + `${resetErr?.toString() ?? `(still on lane "${afterReset?.name}")`}. ` + + `Aborting to avoid silently forking the recreated lane from the wrong source.` + ); + } const createLaneResult = await this.lanes.createLane(laneId.name, { scope: laneId.scope, forkLaneNewScope: true,