raftstore: fix a bug where snapshot file is not deleted in time

The issue is that if a leader fails to send a snapshot, it doesn't delete the snapshot file which may accumulate and waste disk space. The idle snapshot files will eventually be GCed, but that might take hours. Test Plan: Added a new integration test that would fail without the fix. Signed-off-by: Bisheng Huang <hbisheng@gmail.com>
tikv · May 9, 2024 · 5e9a46d · 5e9a46d
1 parent f7ccc07
commit 5e9a46d
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 2 deletions.
diff --git a/src/server/snap.rs b/src/server/snap.rs
@@ -219,6 +219,9 @@ pub fn send_snap(
 
             #[cfg(feature = "failpoints")]
             {
+                fail::fail_point!("snap_send_error", |_| {
+                    Err(Error::Other(box_err!("snap_send_error")))
+                });
                 let should_delay = (|| {
                     fail::fail_point!("snap_send_timer_delay", |_| { true });
                     false
@@ -247,10 +250,11 @@ pub fn send_snap(
         send_timer.observe_duration();
         drop(deregister);
         drop(client);
+
+        fail_point!("snapshot_delete_after_send");
+        mgr.delete_snapshot(&key, &chunks.snap, true);
         match recv_result {
             Ok(_) => {
-                fail_point!("snapshot_delete_after_send");
-                mgr.delete_snapshot(&key, &chunks.snap, true);
                 let cost = UnixSecs::now().into_inner().saturating_sub(snap_start);
                 let send_duration_sec = timer.saturating_elapsed().as_secs();
                 // it should ignore if the duration of snapshot is less than 1s to decrease the

diff --git a/tests/failpoints/cases/test_snap.rs b/tests/failpoints/cases/test_snap.rs
@@ -1047,3 +1047,36 @@ fn test_send_snapshot_timeout() {
     fail::remove("snap_send_timer_delay");
     fail::remove("snap_send_duration_timeout");
 }
+
+// Test that snapshots that failed to be sent are cleaned up.
+#[test]
+fn test_snapshot_cleanup_on_send_error() {
+    let mut cluster = new_server_cluster(0, 3);
+    let pd_client = cluster.pd_client.clone();
+    pd_client.disable_default_operator();
+
+    let r = cluster.run_conf_change();
+
+    pd_client.must_add_peer(r, new_peer(2, 2));
+    cluster.must_put(b"k1", b"v1");
+
+    // Simulate an error on the gprc stream so that the leader fails to send
+    // the first snapshot to peer 3.
+    fail::cfg("snap_send_error", "return").unwrap();
+    pd_client.must_add_peer(r, new_peer(3, 3));
+
+    // Snapshot files are identified by a snap key which consists of region,
+    // term, and applied index. By performing a new put here, we advance the
+    // applied index on the leader and make it generate a new snapshot file.
+    cluster.must_put(b"k2", b"v2");
+
+    // After removing the failpoint, the new snapshot will be sent to peer 3,
+    // but we want to make sure that the old snapshot gets cleaned up as well.
+    fail::remove("snap_send_error");
+    cluster.must_put(b"k3", b"v3");
+
+    must_get_equal(&cluster.get_engine(3), b"k3", b"v3");
+
+    // Check that all snapshots on the leader have been deleted.
+    must_empty_dir(cluster.get_snap_dir(1));
+}