From a333b1a19d0a8ed95449fe5efc5df866d4414a61 Mon Sep 17 00:00:00 2001 From: ziggie Date: Wed, 19 Nov 2025 22:08:31 +0100 Subject: [PATCH 1/2] routing: allow misson control manager to startup despite errors We now allow the mission control manager to skip over deserializable errors. We cannot repair this these results but we just skip over it so we can startup properly. When fetchAll() encounters entries that fail to deserialize, in addition to skipping them, now also: - Delete the corrupted entries from the database - Remove them from the in-memory keysMap and keys tracking structures This prevents corrupted entries from: - Being counted toward maxRecords, which would cause valid entries to be pruned prematurely - Persisting in the database indefinitely - Causing inaccurate entry counts in startup logs --- routing/missioncontrol_store.go | 57 ++++++++++++++- routing/missioncontrol_store_test.go | 105 +++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 3 deletions(-) diff --git a/routing/missioncontrol_store.go b/routing/missioncontrol_store.go index 7398ca0dcca..c960f5c6e34 100644 --- a/routing/missioncontrol_store.go +++ b/routing/missioncontrol_store.go @@ -133,30 +133,81 @@ func (b *missionControlStore) clear() error { } // fetchAll returns all results currently stored in the database. +// It also removes any corrupted entries that fail to deserialize from both +// the database and the in-memory tracking structures. func (b *missionControlStore) fetchAll() ([]*paymentResult, error) { var results []*paymentResult + var corruptedKeys [][]byte - err := b.db.view(func(resultBucket kvdb.RBucket) error { + err := b.db.update(func(resultBucket kvdb.RwBucket) error { results = make([]*paymentResult, 0) + corruptedKeys = make([][]byte, 0) - return resultBucket.ForEach(func(k, v []byte) error { + err := resultBucket.ForEach(func(k, v []byte) error { result, err := deserializeResult(k, v) + + // In case of an error, track the key for removal. if err != nil { - return err + log.Warnf("Failed to deserialize mission "+ + "control entry (key=%x): %v", k, err) + + // Make a copy of the key since ForEach reuses + // the slice. + keyCopy := make([]byte, len(k)) + copy(keyCopy, k) + corruptedKeys = append(corruptedKeys, keyCopy) + + return nil } results = append(results, result) return nil }) + if err != nil { + return err + } + + // Delete corrupted entries from the database. + for _, key := range corruptedKeys { + if err := resultBucket.Delete(key); err != nil { + return fmt.Errorf("failed to delete corrupted "+ + "entry: %w", err) + } + } + return nil }, func() { results = nil + corruptedKeys = nil }) if err != nil { return nil, err } + // Remove corrupted keys from in-memory tracking. + for _, key := range corruptedKeys { + keyStr := string(key) + delete(b.keysMap, keyStr) + + // Remove from the keys list. + for e := b.keys.Front(); e != nil; e = e.Next() { + keyVal, ok := e.Value.(string) + if !ok { + continue + } + if keyVal == keyStr { + b.keys.Remove(e) + break + } + } + } + + if len(corruptedKeys) > 0 { + log.Infof("Removed %d corrupted mission control entries", + len(corruptedKeys)) + } + return results, nil } diff --git a/routing/missioncontrol_store_test.go b/routing/missioncontrol_store_test.go index b020fcbb465..889dca07135 100644 --- a/routing/missioncontrol_store_test.go +++ b/routing/missioncontrol_store_test.go @@ -332,3 +332,108 @@ func BenchmarkMissionControlStoreFlushing(b *testing.B) { }) } } + +// TestMissionControlStoreDeletesCorruptedEntries tests that fetchAll() skips +// entries that fail to deserialize, deletes them from the database, and +// removes them from the in-memory tracking structures. +func TestMissionControlStoreDeletesCorruptedEntries(t *testing.T) { + h := newMCStoreTestHarness(t, testMaxRecords, time.Second) + store := h.store + + failureSourceIdx := 1 + + // Create two valid results. + result1 := newPaymentResult( + 1, mcStoreTestRoute, testTime, testTime, + fn.Some(newPaymentFailure( + &failureSourceIdx, + lnwire.NewFailIncorrectDetails(100, 1000), + )), + ) + + result2 := newPaymentResult( + 2, mcStoreTestRoute, testTime.Add(time.Hour), + testTime.Add(time.Hour), + fn.Some(newPaymentFailure( + &failureSourceIdx, + lnwire.NewFailIncorrectDetails(100, 1000), + )), + ) + + // Store both results. + store.AddResult(result1) + store.AddResult(result2) + require.NoError(t, store.storeResults()) + + // Insert a corrupted entry into the database. + var corruptedKey [8 + 8 + 33]byte + byteOrder.PutUint64(corruptedKey[:], uint64(testTime.Add( + 30*time.Minute).UnixNano()), + ) + byteOrder.PutUint64(corruptedKey[8:], 99) // Unique ID. + copy(corruptedKey[16:], result1.route.Val.sourcePubKey.Val[:]) + + err := store.db.update(func(bucket kvdb.RwBucket) error { + // Insert corrupted/invalid TLV data that will fail to + // deserialize. + corruptedValue := []byte{0xFF, 0xFF, 0xFF, 0xFF} + + return bucket.Put(corruptedKey[:], corruptedValue) + }, func() {}) + require.NoError(t, err) + + // Add the corrupted key to in-memory tracking to simulate it being + // loaded at startup (newMissionControlStore populates keysMap from + // all DB keys). + corruptedKeyStr := string(corruptedKey[:]) + store.keysMap[corruptedKeyStr] = struct{}{} + store.keys.PushBack(corruptedKeyStr) + + // Verify the corrupted key is in the in-memory tracking. + _, exists := store.keysMap[corruptedKeyStr] + require.True(t, exists, "corrupted key should be in keysMap") + + // Verify we have 3 entries in the database before fetchAll. + var dbEntryCountBefore int + err = store.db.view(func(bucket kvdb.RBucket) error { + return bucket.ForEach(func(k, v []byte) error { + dbEntryCountBefore++ + return nil + }) + }, func() { + dbEntryCountBefore = 0 + }) + require.NoError(t, err) + require.Equal(t, 3, dbEntryCountBefore, "should have 3 entries "+ + "in the database before cleanup") + + // Now fetch all results. The corrupted entry should be skipped, + // deleted from the DB, and removed from in-memory tracking. + results, err := store.fetchAll() + require.NoError(t, err, "fetchAll should not return an error "+ + "even when encountering corrupted entries") + require.Len(t, results, 2, "should skip the corrupted entry and "+ + "return only valid results") + + // Verify we still have the correct results. + require.Equal(t, result1, results[0]) + require.Equal(t, result2, results[1]) + + // Verify the corrupted entry was removed from in-memory tracking. + _, exists = store.keysMap[corruptedKeyStr] + require.False(t, exists, "corrupted key should not exist in keysMap") + + // Verify the corrupted entry was deleted from the database. + var dbEntryCountAfter int + err = store.db.view(func(bucket kvdb.RBucket) error { + return bucket.ForEach(func(k, v []byte) error { + dbEntryCountAfter++ + return nil + }) + }, func() { + dbEntryCountAfter = 0 + }) + require.NoError(t, err) + require.Equal(t, 2, dbEntryCountAfter, "corrupted entry should be "+ + "deleted from the database") +} From 798ae45e5fba7ced14b90aca6721509c850d8136 Mon Sep 17 00:00:00 2001 From: ziggie Date: Sat, 22 Nov 2025 02:13:08 +0100 Subject: [PATCH 2/2] docs: add release-notes for LND 20.1 --- docs/release-notes/release-notes-0.20.1.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/release-notes/release-notes-0.20.1.md b/docs/release-notes/release-notes-0.20.1.md index 1963db61b45..83e13a3bf42 100644 --- a/docs/release-notes/release-notes-0.20.1.md +++ b/docs/release-notes/release-notes-0.20.1.md @@ -24,6 +24,11 @@ * Fix bug where channels with both [policies disabled at startup could never be used for routing](https://github.com/lightningnetwork/lnd/pull/10378) +* [Fix a startup issue in LND when ecounntering a + deserialization issue](https://github.com/lightningnetwork/lnd/pull/10383) + in the mission control store. Now we skip over potential errors and also + delete them from the store. + # New Features ## Functional Enhancements