diff --git a/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs b/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs index a49063a..72454cb 100644 --- a/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs +++ b/src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs @@ -104,6 +104,42 @@ public void IntNullableIntDeleteTest() Assert.That(data.ContainsKey(3), Is.True); } + [Test] + public void IntStringGarbageCollectionTest() + { + var dataPath = "data/IntStringGarbageCollectionTest"; + if (Directory.Exists(dataPath)) + Directory.Delete(dataPath, true); + + // load and populate tree + { + using var data = new ZoneTreeFactory() + .SetDataDirectory(dataPath) + .OpenOrCreate(); + data.TryAtomicAdd(1, "1"); + data.TryAtomicAdd(2, "2"); + data.TryAtomicAdd(3, "3"); + data.TryDelete(2); + Assert.That(data.ContainsKey(1), Is.True); + Assert.That(data.ContainsKey(2), Is.False); + Assert.That(data.ContainsKey(3), Is.True); + Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(3)); + } + + // reload tree and check the length + for (var i = 0; i < 3; ++i) + { + using var data = new ZoneTreeFactory() + .Configure(options => options.EnableSingleSegmentGarbageCollection = true) + .SetDataDirectory(dataPath) + .Open(); + Assert.That(data.ContainsKey(1), Is.True); + Assert.That(data.ContainsKey(2), Is.False); + Assert.That(data.ContainsKey(3), Is.True); + Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(2)); + } + } + [TestCase(true)] [TestCase(false)] public void StringIntTreeTest(bool useSparseArray) diff --git a/src/ZoneTree/Core/ZoneTreeLoader.cs b/src/ZoneTree/Core/ZoneTreeLoader.cs index bf208eb..f85fbbf 100644 --- a/src/ZoneTree/Core/ZoneTreeLoader.cs +++ b/src/ZoneTree/Core/ZoneTreeLoader.cs @@ -6,6 +6,7 @@ using Tenray.ZoneTree.Segments.InMemory; using Tenray.ZoneTree.Segments.MultiPart; using Tenray.ZoneTree.Segments.NullDisk; +using Tenray.ZoneTree.WAL; namespace Tenray.ZoneTree.Core; @@ -145,11 +146,17 @@ void ValidateSegmentOrder() } } - void LoadMutableSegment(long maximumOpIndex) + IWriteAheadLog LoadMutableSegment(long maximumOpIndex, + bool collectGarbage) { var loader = new MutableSegmentLoader(Options); MutableSegment = loader - .LoadMutableSegment(ZoneTreeMeta.MutableSegment, maximumOpIndex); + .LoadMutableSegment( + ZoneTreeMeta.MutableSegment, + maximumOpIndex, + collectGarbage, + out var wal); + return wal; } long LoadReadOnlySegments() @@ -225,13 +232,29 @@ void SetMaximumId() maximumId = bs.Count > 0 ? bs.Max() : 0; SetMaximumSegmentId(maximumId); } + public ZoneTree LoadZoneTree() { LoadZoneTreeMeta(); LoadZoneTreeMetaWAL(); SetMaximumId(); var maximumOpIndex = LoadReadOnlySegments(); - LoadMutableSegment(maximumOpIndex); + bool collectGarbage = Options.EnableSingleSegmentGarbageCollection && !ZoneTreeMeta.HasDiskSegment && ReadOnlySegments.Count == 0; + var mutableSegmentWal = LoadMutableSegment(maximumOpIndex, collectGarbage); + if (collectGarbage) + { + var len = MutableSegment.Length; + var keys = new TKey[len]; + var values = new TValue[len]; + var iterator = MutableSegment.GetSeekableIterator(); + var i = 0; + while (iterator.Next()) + { + keys[i] = iterator.CurrentKey; + values[i++] = iterator.CurrentValue; + } + mutableSegmentWal.ReplaceWriteAheadLog(keys, values, true); + } LoadDiskSegment(); LoadBottomSegments(); var zoneTree = new ZoneTree(Options, ZoneTreeMeta, diff --git a/src/ZoneTree/Core/ZoneTreeMeta.cs b/src/ZoneTree/Core/ZoneTreeMeta.cs index 5da88cf..d79e6e5 100644 --- a/src/ZoneTree/Core/ZoneTreeMeta.cs +++ b/src/ZoneTree/Core/ZoneTreeMeta.cs @@ -21,7 +21,7 @@ public sealed class ZoneTreeMeta public int DiskSegmentMaxItemCount { get; set; } = 20_000_000; public WriteAheadLogOptions WriteAheadLogOptions { get; set; } - + public DiskSegmentOptions DiskSegmentOptions { get; set; } public long MutableSegment { get; set; } @@ -31,4 +31,6 @@ public sealed class ZoneTreeMeta public long DiskSegment { get; set; } public IReadOnlyList BottomSegments { get; set; } + + public bool HasDiskSegment => DiskSegment != 0 && BottomSegments?.Count > 0; } diff --git a/src/ZoneTree/Directory.Build.props b/src/ZoneTree/Directory.Build.props index f2b68dd..8520e53 100644 --- a/src/ZoneTree/Directory.Build.props +++ b/src/ZoneTree/Directory.Build.props @@ -5,8 +5,8 @@ Ahmed Yasin Koculu ZoneTree ZoneTree - 1.6.4.0 - 1.6.4.0 + 1.6.5.0 + 1.6.5.0 Ahmed Yasin Koculu ZoneTree ZoneTree is a persistent, high-performance, transactional, ACID-compliant ordered key-value database for NET. It can operate in memory or on local/cloud storage. diff --git a/src/ZoneTree/Options/ZoneTreeOptions.cs b/src/ZoneTree/Options/ZoneTreeOptions.cs index 3c01ad2..a3ae461 100644 --- a/src/ZoneTree/Options/ZoneTreeOptions.cs +++ b/src/ZoneTree/Options/ZoneTreeOptions.cs @@ -256,6 +256,13 @@ public void Validate() /// public DeleteValueConfigurationValidation DeleteValueConfigurationValidation { get; set; } + /// + /// If the ZoneTree contains only a single segment (which is the mutable segment), + /// there is an opportunity to perform a hard delete of the soft deleted values. + /// If enabled, the tree performs garbage collection on load if it is applicable. + /// + public bool EnableSingleSegmentGarbageCollection { get; set; } + /// /// Creates default delete delegates for nullable types. /// diff --git a/src/ZoneTree/Segments/InMemory/MutableSegment.cs b/src/ZoneTree/Segments/InMemory/MutableSegment.cs index 1818181..0cc24ab 100644 --- a/src/ZoneTree/Segments/InMemory/MutableSegment.cs +++ b/src/ZoneTree/Segments/InMemory/MutableSegment.cs @@ -68,7 +68,8 @@ public sealed class MutableSegment : IMutableSegment ZoneTreeOptions options, IReadOnlyList keys, IReadOnlyList values, - long nextOpIndex) + long nextOpIndex, + bool collectGarbage) { SegmentId = segmentId; WriteAheadLog = wal; @@ -85,7 +86,16 @@ public sealed class MutableSegment : IMutableSegment MarkValueDeleted = options.MarkValueDeleted; MutableSegmentMaxItemCount = options.MutableSegmentMaxItemCount; - LoadLogEntries(keys, values); + if (collectGarbage) + { + // If there isn't any disk segment and readonly segment, + // it is safe to hard delete the soft deleted values. + LoadLogEntriesWithGarbageCollection(keys, values); + } + else + { + LoadLogEntries(keys, values); + } } void LoadLogEntries(IReadOnlyList keys, IReadOnlyList values) @@ -101,6 +111,29 @@ void LoadLogEntries(IReadOnlyList keys, IReadOnlyList values) } } + void LoadLogEntriesWithGarbageCollection( + IReadOnlyList keys, + IReadOnlyList values) + { + var distinctKeys = + new BTree(Options.Comparer, Collections.BTree.Lock.BTreeLockMode.NoLock); + + var isValueDeleted = Options.IsValueDeleted; + for (var i = keys.Count - 1; i >= 0; --i) + { + var key = keys[i]; + if (distinctKeys.ContainsKey(in key)) + continue; + var value = values[i]; + distinctKeys.Upsert(in key, 1, out _); + if (isValueDeleted(in value)) + { + continue; + } + BTree.Upsert(in key, in value, out var _); + } + } + public bool ContainsKey(in TKey key) { return BTree.ContainsKey(key); diff --git a/src/ZoneTree/Segments/InMemory/MutableSegmentLoader.cs b/src/ZoneTree/Segments/InMemory/MutableSegmentLoader.cs index ed45855..d01d7f1 100644 --- a/src/ZoneTree/Segments/InMemory/MutableSegmentLoader.cs +++ b/src/ZoneTree/Segments/InMemory/MutableSegmentLoader.cs @@ -1,6 +1,7 @@ using Tenray.ZoneTree.Exceptions; using Tenray.ZoneTree.Core; using Tenray.ZoneTree.Options; +using Tenray.ZoneTree.WAL; namespace Tenray.ZoneTree.Segments.InMemory; @@ -14,9 +15,13 @@ public sealed class MutableSegmentLoader Options = options; } - public IMutableSegment LoadMutableSegment(long segmentId, long maximumOpIndex) + public IMutableSegment LoadMutableSegment( + long segmentId, + long maximumOpIndex, + bool collectGarbage, + out IWriteAheadLog wal) { - var wal = Options.WriteAheadLogProvider + wal = Options.WriteAheadLogProvider .GetOrCreateWAL( segmentId, ZoneTree.SegmentWalCategory, @@ -40,8 +45,13 @@ public sealed class MutableSegmentLoader } } maximumOpIndex = Math.Max(result.MaximumOpIndex, maximumOpIndex); - return new MutableSegment - (segmentId, wal, Options, result.Keys, - result.Values, maximumOpIndex + 1); + return new MutableSegment( + segmentId, + wal, + Options, + result.Keys, + result.Values, + maximumOpIndex + 1, + collectGarbage); } } diff --git a/src/ZoneTree/ZoneTreeFactory.cs b/src/ZoneTree/ZoneTreeFactory.cs index 25b3dc0..f1151b2 100644 --- a/src/ZoneTree/ZoneTreeFactory.cs +++ b/src/ZoneTree/ZoneTreeFactory.cs @@ -475,6 +475,20 @@ void FillValueSerializer() new ByteArraySerializer() as ISerializer; } + void LoadInitialSparseArrays(ZoneTree zoneTree) + { + if (InitialSparseArrayLength <= 1) + return; + + var t1 = Task.Run(() => + zoneTree.Maintenance.DiskSegment.InitSparseArray(InitialSparseArrayLength)); + Parallel.ForEach(zoneTree.Maintenance.BottomSegments, (bs) => + { + bs.InitSparseArray(InitialSparseArrayLength); + }); + t1.Wait(); + } + /// /// Opens or creates a ZoneTree. /// @@ -487,13 +501,7 @@ void FillValueSerializer() if (loader.ZoneTreeMetaExists) { var zoneTree = loader.LoadZoneTree(); - var t1 = Task.Run(() => - zoneTree.Maintenance.DiskSegment.InitSparseArray(InitialSparseArrayLength)); - Parallel.ForEach(zoneTree.Maintenance.BottomSegments, (bs) => - { - bs.InitSparseArray(InitialSparseArrayLength); - }); - t1.Wait(); + LoadInitialSparseArrays(zoneTree); return zoneTree; } return new ZoneTree(Options); @@ -526,7 +534,9 @@ void FillValueSerializer() var loader = new ZoneTreeLoader(Options); if (!loader.ZoneTreeMetaExists) throw new DatabaseNotFoundException(); - return loader.LoadZoneTree(); + var zoneTree = loader.LoadZoneTree(); + LoadInitialSparseArrays(zoneTree); + return zoneTree; } ///