Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Garbage collection on startup #39

Merged
merged 3 commits into from
Jun 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/ZoneTree.UnitTests/FixedSizeKeyAndValueTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,42 @@ public void IntNullableIntDeleteTest()
Assert.That(data.ContainsKey(3), Is.True);
}

[Test]
public void IntStringGarbageCollectionTest()
{
var dataPath = "data/IntStringGarbageCollectionTest";
if (Directory.Exists(dataPath))
Directory.Delete(dataPath, true);

// load and populate tree
{
using var data = new ZoneTreeFactory<int, string>()
.SetDataDirectory(dataPath)
.OpenOrCreate();
data.TryAtomicAdd(1, "1");
data.TryAtomicAdd(2, "2");
data.TryAtomicAdd(3, "3");
data.TryDelete(2);
Assert.That(data.ContainsKey(1), Is.True);
Assert.That(data.ContainsKey(2), Is.False);
Assert.That(data.ContainsKey(3), Is.True);
Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(3));
}

// reload tree and check the length
for (var i = 0; i < 3; ++i)
{
using var data = new ZoneTreeFactory<int, string>()
.Configure(options => options.EnableSingleSegmentGarbageCollection = true)
.SetDataDirectory(dataPath)
.Open();
Assert.That(data.ContainsKey(1), Is.True);
Assert.That(data.ContainsKey(2), Is.False);
Assert.That(data.ContainsKey(3), Is.True);
Assert.That(data.Maintenance.MutableSegment.Length, Is.EqualTo(2));
}
}

[TestCase(true)]
[TestCase(false)]
public void StringIntTreeTest(bool useSparseArray)
Expand Down
29 changes: 26 additions & 3 deletions src/ZoneTree/Core/ZoneTreeLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using Tenray.ZoneTree.Segments.InMemory;
using Tenray.ZoneTree.Segments.MultiPart;
using Tenray.ZoneTree.Segments.NullDisk;
using Tenray.ZoneTree.WAL;

namespace Tenray.ZoneTree.Core;

Expand Down Expand Up @@ -145,11 +146,17 @@ void ValidateSegmentOrder()
}
}

void LoadMutableSegment(long maximumOpIndex)
IWriteAheadLog<TKey, TValue> LoadMutableSegment(long maximumOpIndex,
bool collectGarbage)
{
var loader = new MutableSegmentLoader<TKey, TValue>(Options);
MutableSegment = loader
.LoadMutableSegment(ZoneTreeMeta.MutableSegment, maximumOpIndex);
.LoadMutableSegment(
ZoneTreeMeta.MutableSegment,
maximumOpIndex,
collectGarbage,
out var wal);
return wal;
}

long LoadReadOnlySegments()
Expand Down Expand Up @@ -225,13 +232,29 @@ void SetMaximumId()
maximumId = bs.Count > 0 ? bs.Max() : 0;
SetMaximumSegmentId(maximumId);
}

public ZoneTree<TKey, TValue> LoadZoneTree()
{
LoadZoneTreeMeta();
LoadZoneTreeMetaWAL();
SetMaximumId();
var maximumOpIndex = LoadReadOnlySegments();
LoadMutableSegment(maximumOpIndex);
bool collectGarbage = Options.EnableSingleSegmentGarbageCollection && !ZoneTreeMeta.HasDiskSegment && ReadOnlySegments.Count == 0;
var mutableSegmentWal = LoadMutableSegment(maximumOpIndex, collectGarbage);
if (collectGarbage)
{
var len = MutableSegment.Length;
var keys = new TKey[len];
var values = new TValue[len];
var iterator = MutableSegment.GetSeekableIterator();
var i = 0;
while (iterator.Next())
{
keys[i] = iterator.CurrentKey;
values[i++] = iterator.CurrentValue;
}
mutableSegmentWal.ReplaceWriteAheadLog(keys, values, true);
}
LoadDiskSegment();
LoadBottomSegments();
var zoneTree = new ZoneTree<TKey, TValue>(Options, ZoneTreeMeta,
Expand Down
4 changes: 3 additions & 1 deletion src/ZoneTree/Core/ZoneTreeMeta.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public sealed class ZoneTreeMeta
public int DiskSegmentMaxItemCount { get; set; } = 20_000_000;

public WriteAheadLogOptions WriteAheadLogOptions { get; set; }

public DiskSegmentOptions DiskSegmentOptions { get; set; }

public long MutableSegment { get; set; }
Expand All @@ -31,4 +31,6 @@ public sealed class ZoneTreeMeta
public long DiskSegment { get; set; }

public IReadOnlyList<long> BottomSegments { get; set; }

public bool HasDiskSegment => DiskSegment != 0 && BottomSegments?.Count > 0;
}
4 changes: 2 additions & 2 deletions src/ZoneTree/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
<Authors>Ahmed Yasin Koculu</Authors>
<PackageId>ZoneTree</PackageId>
<Title>ZoneTree</Title>
<ProductVersion>1.6.4.0</ProductVersion>
<Version>1.6.4.0</Version>
<ProductVersion>1.6.5.0</ProductVersion>
<Version>1.6.5.0</Version>
<Authors>Ahmed Yasin Koculu</Authors>
<AssemblyTitle>ZoneTree</AssemblyTitle>
<Description>ZoneTree is a persistent, high-performance, transactional, ACID-compliant ordered key-value database for NET. It can operate in memory or on local/cloud storage.</Description>
Expand Down
7 changes: 7 additions & 0 deletions src/ZoneTree/Options/ZoneTreeOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,13 @@ public void Validate()
/// </summary>
public DeleteValueConfigurationValidation DeleteValueConfigurationValidation { get; set; }

/// <summary>
/// If the ZoneTree contains only a single segment (which is the mutable segment),
/// there is an opportunity to perform a hard delete of the soft deleted values.
/// If enabled, the tree performs garbage collection on load if it is applicable.
/// </summary>
public bool EnableSingleSegmentGarbageCollection { get; set; }

/// <summary>
/// Creates default delete delegates for nullable types.
/// </summary>
Expand Down
37 changes: 35 additions & 2 deletions src/ZoneTree/Segments/InMemory/MutableSegment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ public sealed class MutableSegment<TKey, TValue> : IMutableSegment<TKey, TValue>
ZoneTreeOptions<TKey, TValue> options,
IReadOnlyList<TKey> keys,
IReadOnlyList<TValue> values,
long nextOpIndex)
long nextOpIndex,
bool collectGarbage)
{
SegmentId = segmentId;
WriteAheadLog = wal;
Expand All @@ -85,7 +86,16 @@ public sealed class MutableSegment<TKey, TValue> : IMutableSegment<TKey, TValue>

MarkValueDeleted = options.MarkValueDeleted;
MutableSegmentMaxItemCount = options.MutableSegmentMaxItemCount;
LoadLogEntries(keys, values);
if (collectGarbage)
{
// If there isn't any disk segment and readonly segment,
// it is safe to hard delete the soft deleted values.
LoadLogEntriesWithGarbageCollection(keys, values);
}
else
{
LoadLogEntries(keys, values);
}
}

void LoadLogEntries(IReadOnlyList<TKey> keys, IReadOnlyList<TValue> values)
Expand All @@ -101,6 +111,29 @@ void LoadLogEntries(IReadOnlyList<TKey> keys, IReadOnlyList<TValue> values)
}
}

void LoadLogEntriesWithGarbageCollection(
IReadOnlyList<TKey> keys,
IReadOnlyList<TValue> values)
{
var distinctKeys =
new BTree<TKey, byte>(Options.Comparer, Collections.BTree.Lock.BTreeLockMode.NoLock);

var isValueDeleted = Options.IsValueDeleted;
for (var i = keys.Count - 1; i >= 0; --i)
{
var key = keys[i];
if (distinctKeys.ContainsKey(in key))
continue;
var value = values[i];
distinctKeys.Upsert(in key, 1, out _);
if (isValueDeleted(in value))
{
continue;
}
BTree.Upsert(in key, in value, out var _);
}
}

public bool ContainsKey(in TKey key)
{
return BTree.ContainsKey(key);
Expand Down
20 changes: 15 additions & 5 deletions src/ZoneTree/Segments/InMemory/MutableSegmentLoader.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using Tenray.ZoneTree.Exceptions;
using Tenray.ZoneTree.Core;
using Tenray.ZoneTree.Options;
using Tenray.ZoneTree.WAL;

namespace Tenray.ZoneTree.Segments.InMemory;

Expand All @@ -14,9 +15,13 @@ public sealed class MutableSegmentLoader<TKey, TValue>
Options = options;
}

public IMutableSegment<TKey, TValue> LoadMutableSegment(long segmentId, long maximumOpIndex)
public IMutableSegment<TKey, TValue> LoadMutableSegment(
long segmentId,
long maximumOpIndex,
bool collectGarbage,
out IWriteAheadLog<TKey, TValue> wal)
{
var wal = Options.WriteAheadLogProvider
wal = Options.WriteAheadLogProvider
.GetOrCreateWAL(
segmentId,
ZoneTree<TKey, TValue>.SegmentWalCategory,
Expand All @@ -40,8 +45,13 @@ public sealed class MutableSegmentLoader<TKey, TValue>
}
}
maximumOpIndex = Math.Max(result.MaximumOpIndex, maximumOpIndex);
return new MutableSegment<TKey, TValue>
(segmentId, wal, Options, result.Keys,
result.Values, maximumOpIndex + 1);
return new MutableSegment<TKey, TValue>(
segmentId,
wal,
Options,
result.Keys,
result.Values,
maximumOpIndex + 1,
collectGarbage);
}
}
26 changes: 18 additions & 8 deletions src/ZoneTree/ZoneTreeFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,20 @@ void FillValueSerializer()
new ByteArraySerializer() as ISerializer<TValue>;
}

void LoadInitialSparseArrays(ZoneTree<TKey, TValue> zoneTree)
{
if (InitialSparseArrayLength <= 1)
return;

var t1 = Task.Run(() =>
zoneTree.Maintenance.DiskSegment.InitSparseArray(InitialSparseArrayLength));
Parallel.ForEach(zoneTree.Maintenance.BottomSegments, (bs) =>
{
bs.InitSparseArray(InitialSparseArrayLength);
});
t1.Wait();
}

/// <summary>
/// Opens or creates a ZoneTree.
/// </summary>
Expand All @@ -487,13 +501,7 @@ void FillValueSerializer()
if (loader.ZoneTreeMetaExists)
{
var zoneTree = loader.LoadZoneTree();
var t1 = Task.Run(() =>
zoneTree.Maintenance.DiskSegment.InitSparseArray(InitialSparseArrayLength));
Parallel.ForEach(zoneTree.Maintenance.BottomSegments, (bs) =>
{
bs.InitSparseArray(InitialSparseArrayLength);
});
t1.Wait();
LoadInitialSparseArrays(zoneTree);
return zoneTree;
}
return new ZoneTree<TKey, TValue>(Options);
Expand Down Expand Up @@ -526,7 +534,9 @@ void FillValueSerializer()
var loader = new ZoneTreeLoader<TKey, TValue>(Options);
if (!loader.ZoneTreeMetaExists)
throw new DatabaseNotFoundException();
return loader.LoadZoneTree();
var zoneTree = loader.LoadZoneTree();
LoadInitialSparseArrays(zoneTree);
return zoneTree;
}

/// <summary>
Expand Down