Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
5901c4f
Implement graceful shutdown for Garnet server
yuseok-kim-edushare Nov 26, 2025
9ea9339
Update hosting/Windows/Garnet.worker/Program.cs
yuseok-kim-edushare Nov 26, 2025
7731df6
Update libs/host/GarnetServer.cs
yuseok-kim-edushare Nov 26, 2025
597ec57
Update main/GarnetServer/Program.cs
yuseok-kim-edushare Nov 26, 2025
9e168f4
Update main/GarnetServer/Program.cs
yuseok-kim-edushare Nov 26, 2025
67a7f2e
Update main/GarnetServer/Program.cs
yuseok-kim-edushare Nov 26, 2025
5b25407
Update libs/host/GarnetServer.cs
yuseok-kim-edushare Nov 26, 2025
476d629
🐛 Resolve Race Condition risk in "StopListening" impl at GarnetServer…
yuseok-kim-edushare Nov 26, 2025
9bf52de
✅ add test for gracefulshutdown about main/garnetserver
yuseok-kim-edushare Nov 26, 2025
11d115b
🐛 Fix risk of shutdown handler remaining
yuseok-kim-edushare Nov 26, 2025
3b3df07
✏️ fix by dotnet format
yuseok-kim-edushare Nov 26, 2025
2489995
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Dec 3, 2025
9d5017a
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Dec 6, 2025
5eb025c
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Dec 13, 2025
ff37e9b
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Dec 22, 2025
c542789
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Jan 7, 2026
efd4c75
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Jan 14, 2026
0b113a7
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Jan 19, 2026
19b1a13
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Jan 25, 2026
d77bd6b
✅🔀 Fix Test with Allure related Requirements (reflect #1457)
yuseok-kim-edushare Jan 25, 2026
f14fb2c
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Jan 25, 2026
c8da690
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Jan 27, 2026
2e9a53b
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Jan 29, 2026
30fc80d
Merge branch 'main' into feat/graceful_shutdown
yuseok-kim-edushare Feb 8, 2026
3177001
⏪ Revert : Windows Service shutdown timeout custom
yuseok-kim-edushare Feb 8, 2026
920c328
Use long for active connection count
yuseok-kim-edushare Feb 8, 2026
8ea0c24
minor perf fix in connection counting
yuseok-kim-edushare Feb 8, 2026
bf05d23
Use linked CancellationTokenSource for shutdown wait
yuseok-kim-edushare Feb 8, 2026
01a2547
update log levels infomation -> debug for perfomance inhancing
yuseok-kim-edushare Feb 8, 2026
65b1391
minor bug risk of cancelled canclationToken provide into finally
yuseok-kim-edushare Feb 8, 2026
5145094
🧪 temporal test code save (test for gracefull shutown in single insta…
yuseok-kim-edushare Feb 8, 2026
cf9c997
Fix Program.CS main method refine
yuseok-kim-edushare Feb 8, 2026
514456c
Update libs/host/GarnetServer.cs
yuseok-kim-edushare Feb 8, 2026
db633e3
Update test/Garnet.test/GarnetServerTcpTests.cs
yuseok-kim-edushare Feb 8, 2026
de4e462
Update main/GarnetServer/Program.cs
yuseok-kim-edushare Feb 8, 2026
74f1c0a
Apply suggestion from @Copilot
yuseok-kim-edushare Feb 8, 2026
7bb2ab2
Fix about copilot's concern about race conditions
yuseok-kim-edushare Feb 8, 2026
7cb1e9c
fix issue claimed by copilot
yuseok-kim-edushare Feb 8, 2026
2530260
🔥 remove duplicated test
yuseok-kim-edushare Feb 8, 2026
784f630
Fix Test code running flow
yuseok-kim-edushare Feb 8, 2026
bc6580d
Fix test codes to reflect copilot's suggestions
yuseok-kim-edushare Feb 8, 2026
e9b0a3e
✨Add noSave Arg to Server.ShutdownAsync()
yuseok-kim-edushare Feb 16, 2026
aaaf45b
Add shutdown data consistency tests
yuseok-kim-edushare Feb 16, 2026
714c2f6
Rearrange AOF commit and checkpoint during Data saving
yuseok-kim-edushare Feb 16, 2026
08e48a2
Save data only once during single shutdown process
yuseok-kim-edushare Feb 16, 2026
c6fcb19
Remove isListening Flag
yuseok-kim-edushare Feb 16, 2026
4ccad8e
✏️ Fix format
yuseok-kim-edushare Feb 16, 2026
d37ad7c
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Feb 16, 2026
60faf96
✏️ Fix comment about infomational Test
yuseok-kim-edushare Feb 16, 2026
e0616b7
Merge branch 'yuseok-kim/graceful_shutdown' of https://github.com/yus…
yuseok-kim-edushare Feb 16, 2026
36a85b9
Skip graceful save on forced shutdown
yuseok-kim-edushare Feb 16, 2026
d29c716
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Feb 18, 2026
b7d4d25
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Feb 20, 2026
42d7bd2
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Feb 25, 2026
e6be406
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Feb 25, 2026
1a2fff7
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Mar 2, 2026
fccbbd0
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Mar 3, 2026
6c883a2
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Mar 4, 2026
6d97896
Merge branch 'main' into yuseok-kim/graceful_shutdown
yuseok-kim-edushare Mar 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions hosting/Windows/Garnet.worker/Worker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,26 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
/// <param name="cancellationToken">Indicates that the shutdown process should no longer be graceful.</param>
public override async Task StopAsync(CancellationToken cancellationToken)
{
Dispose();
await base.StopAsync(cancellationToken).ConfigureAwait(false);
try
{
if (server != null)
{
// If cancellation is requested, we will skip the graceful shutdown and proceed to dispose immediately
bool isForceShutdown = cancellationToken.IsCancellationRequested;
// Perform graceful shutdown with AOF commit and checkpoint when not forced Shutdown From OS.
await server.ShutdownAsync(timeout: TimeSpan.FromSeconds(5), noSave: isForceShutdown, token: cancellationToken).ConfigureAwait(false);
}
}
catch (OperationCanceledException)
{
// Force shutdown requested - proceed to dispose
}
finally
{
// Ensure base class cleanup although cancellationToken is cancelled
await base.StopAsync(CancellationToken.None).ConfigureAwait(false);
Dispose();
}
}

public override void Dispose()
Expand All @@ -55,6 +73,8 @@ public override void Dispose()
}
server?.Dispose();
_isDisposed = true;
base.Dispose();
GC.SuppressFinalize(this);
}
}
}
196 changes: 196 additions & 0 deletions libs/host/GarnetServer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Garnet.cluster;
using Garnet.common;
using Garnet.networking;
Expand Down Expand Up @@ -450,6 +451,201 @@ public void Start()
Console.WriteLine("* Ready to accept connections");
}

/// <summary>
/// Performs graceful shutdown of the server.
/// Stops accepting new connections, waits for active connections to complete, commits AOF, and takes checkpoint if needed.
/// </summary>
/// <param name="timeout">Timeout for waiting on active connections (default: 30 seconds)</param>
/// <param name="noSave">If true, skip data persistence (AOF commit and checkpoint) during shutdown</param>
/// <param name="token">Cancellation token</param>
/// <returns>Task representing the async shutdown operation</returns>
public async Task ShutdownAsync(TimeSpan? timeout = null, bool noSave = false, CancellationToken token = default)
{
var shutdownTimeout = timeout ?? TimeSpan.FromSeconds(30);

try
{
// Stop accepting new connections first
StopListening();

// Wait for existing connections to complete (cancellable)
try
{
await WaitForActiveConnectionsAsync(shutdownTimeout, token).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
logger?.LogWarning("Connection draining was cancelled. Proceeding with data finalization...");
}
}
catch (Exception ex)
{
logger?.LogError(ex, "Error during graceful shutdown");
}
finally
{
if (!noSave)
{
// Attempt AOF commit or checkpoint as best-effort,
// even if connection draining was cancelled or failed.
// Use a bounded timeout instead of the caller's token to ensure completion.
using var finalizeCts = new CancellationTokenSource(TimeSpan.FromSeconds(15));
try
{
await FinalizeDataAsync(finalizeCts.Token).ConfigureAwait(false);
}
catch (Exception ex)
{
logger?.LogError(ex, "Error during data finalization");
}
}
else
{
logger?.LogInformation("Shutdown with noSave flag - skipping data persistence.");
}
}
}

/// <summary>
/// Stop all servers from accepting new connections.
/// </summary>
private void StopListening()
{
if (servers == null) return;

logger?.LogDebug("Stopping listeners to prevent new connections...");
foreach (var server in servers)
{
try
{
server?.StopListening();
}
catch (Exception ex)
{
logger?.LogWarning(ex, "Error stopping listener");
}
}
}

/// <summary>
/// Waits for active connections to complete within the specified timeout.
/// </summary>
private async Task WaitForActiveConnectionsAsync(TimeSpan timeout, CancellationToken token)
{
if (servers == null) return;

// Linked Token : between external token and timeout
using var cts = CancellationTokenSource.CreateLinkedTokenSource(token);
cts.CancelAfter(timeout);

var delays = new[] { 50, 300, 1000 };
var delayIndex = 0;

try
{
while (!cts.Token.IsCancellationRequested)
{
var activeConnections = GetActiveConnectionCount();
if (activeConnections == 0)
{
logger?.LogInformation("All connections have been closed gracefully.");
return;
}

logger?.LogInformation("Waiting for {ActiveConnections} active connections to complete...", activeConnections);

var currentDelay = delays[delayIndex];
if (delayIndex < delays.Length - 1) delayIndex++;

await Task.Delay(currentDelay, cts.Token).ConfigureAwait(false);
}
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
{
throw;
}
catch (OperationCanceledException)
{
// timeout reached error logging
logger?.LogWarning("Timeout reached after {TimeoutSeconds} seconds. Some connections may still be active.",
timeout.TotalSeconds);
}
catch (Exception ex)
{
logger?.LogWarning(ex, "Error checking active connections");
await Task.Delay(500, token).ConfigureAwait(false);
}
}

/// <summary>
/// Gets the current number of active connections directly from server instances.
/// </summary>
private long GetActiveConnectionCount()
{
long count = 0;
if (servers != null)
{
foreach (var garnetServer in servers)
{
if (garnetServer is GarnetServerBase garnetServerBase)
{
count += garnetServerBase.get_conn_active();
}
}
}
return count;
}

/// <summary>
/// Persists data during shutdown using AOF or checkpoint based on configuration.
/// </summary>
private async Task FinalizeDataAsync(CancellationToken token)
{
if (opts.EnableAOF)
{
logger?.LogDebug("Committing AOF before shutdown...");
try
{
var commitSuccess = await Store.CommitAOFAsync(token).ConfigureAwait(false);
if (commitSuccess)
{
logger?.LogDebug("AOF committed successfully.");
}
else
{
logger?.LogInformation("AOF commit skipped (another commit in progress or replica mode).");
}
}
catch (Exception ex)
{
logger?.LogError(ex, "Error committing AOF during shutdown");
}

return;
}

if (!opts.EnableStorageTier)
return;

logger?.LogDebug("Taking checkpoint for tiered storage...");
try
{
var checkpointSuccess = Store.TakeCheckpoint(background: false, token: token);
if (checkpointSuccess)
{
logger?.LogDebug("Checkpoint completed successfully.");
}
else
{
logger?.LogInformation("Checkpoint skipped (another checkpoint in progress or replica mode).");
}
}
catch (Exception ex)
{
logger?.LogError(ex, "Error taking checkpoint during shutdown");
}
}

/// <summary>
/// Dispose store (including log and checkpoint directory)
/// </summary>
Expand Down
6 changes: 6 additions & 0 deletions libs/server/Servers/GarnetServerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ public bool AddSession(WireFormat protocol, ref ISessionProvider provider, INetw
/// <inheritdoc />
public abstract void Start();

/// <inheritdoc />
public virtual void StopListening()
{
// Base implementation does nothing; derived classes should override
}

/// <inheritdoc />
public virtual void Dispose()
{
Expand Down
16 changes: 16 additions & 0 deletions libs/server/Servers/GarnetServerTcp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,22 @@ public override void Start()
AcceptEventArg_Completed(null, acceptEventArg);
}

/// <inheritdoc />
public override void StopListening()
{
try
{
// Close the listen socket to stop accepting new connections
// This will cause any pending AcceptAsync to complete with an error
listenSocket.Close();
logger?.LogDebug("Stopped accepting new connections on {endpoint}", EndPoint);
}
catch (Exception ex)
{
logger?.LogDebug(ex, "Error closing listen socket on {endpoint}", EndPoint);
}
}

private void AcceptEventArg_Completed(object sender, SocketAsyncEventArgs e)
{
try
Expand Down
6 changes: 6 additions & 0 deletions libs/server/Servers/IGarnetServer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,11 @@ public interface IGarnetServer : IDisposable
/// Start server
/// </summary>
public void Start();

/// <summary>
/// Stop accepting new connections (for graceful shutdown).
/// Existing connections remain active until they complete or are disposed.
/// </summary>
public void StopListening();
}
}
29 changes: 29 additions & 0 deletions libs/server/Servers/StoreApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,35 @@ public bool FlushDB(int dbId = 0, bool unsafeTruncateLog = false)
}
}

/// <summary>
/// Take checkpoint for all active databases
/// </summary>
/// <param name="background">True if method can return before checkpoint is taken</param>
/// <param name="token">Cancellation token</param>
/// <returns>false if checkpoint was skipped due to node state or another checkpoint in progress</returns>
public bool TakeCheckpoint(bool background = false, CancellationToken token = default)
{
using (PreventRoleChange(out var acquired))
{
if (!acquired || IsReplica)
{
return false;
}

return storeWrapper.TakeCheckpoint(background, logger: null, token: token);
}
}

/// <summary>
/// Check if storage tier is enabled
/// </summary>
public bool IsStorageTierEnabled => storeWrapper.serverOptions.EnableStorageTier;

/// <summary>
/// Check if AOF is enabled
/// </summary>
public bool IsAOFEnabled => storeWrapper.serverOptions.EnableAOF;

/// <summary>
/// Helper to disable role changes during a using block.
///
Expand Down
13 changes: 12 additions & 1 deletion main/GarnetServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,18 @@ static void Main(string[] args)
// Start the server
server.Start();

Thread.Sleep(Timeout.Infinite);
using var shutdownEvent = new ManualResetEventSlim(false);

Console.CancelKeyPress += (sender, e) =>
{
e.Cancel = true;
// Graceful shutdown: drain connections, commit AOF, take checkpoint
server.ShutdownAsync(TimeSpan.FromSeconds(5))
.GetAwaiter().GetResult();
shutdownEvent.Set();
};

shutdownEvent.Wait();
}
catch (Exception ex)
{
Expand Down
Loading
Loading