Skip to content

Commit 42f5da5

Browse files
committed
Disconnect chained replicas when the replica performs PSYNC with the master always to avoid replication offset mismatch between master and chained replicas.
1 parent 90fda5c commit 42f5da5

File tree

2 files changed

+60
-3
lines changed

2 files changed

+60
-3
lines changed

src/replication.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,12 +2023,19 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
20232023
* new one. */
20242024
memcpy(server.replid,new,sizeof(server.replid));
20252025
memcpy(server.cached_master->replid,new,sizeof(server.replid));
2026-
2027-
/* Disconnect all the sub-slaves: they need to be notified. */
2028-
disconnectSlaves();
20292026
}
20302027
}
20312028

2029+
/* Disconnect all the sub-replicas: they need to be notified always because
2030+
* in case the master has last replicated some non-meaningful commands
2031+
* (e.g. PINGs), the primary replica will request the PSYNC offset for the
2032+
* last known meaningful command. This means the master will again replicate
2033+
* the non-meaningful commands. If the sub-replicas still remains connected,
2034+
* they will receive those commands a second time and increment the master
2035+
* replication offset to be higher than the master's offset forever.
2036+
*/
2037+
disconnectSlaves();
2038+
20322039
/* Setup the replication to continue. */
20332040
sdsfree(reply);
20342041
replicationResurrectCachedMaster(conn);

tests/integration/psync2-pingoff.tcl

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,53 @@ start_server {} {
6060
}
6161
}
6262
}}
63+
64+
65+
start_server {tags {"psync2"}} {
66+
start_server {} {
67+
start_server {} {
68+
69+
for {set j 0} {$j < 3} {incr j} {
70+
set R($j) [srv [expr 0-$j] client]
71+
set R_host($j) [srv [expr 0-$j] host]
72+
set R_port($j) [srv [expr 0-$j] port]
73+
$R($j) CONFIG SET repl-ping-replica-period 1
74+
}
75+
76+
test "Chained replicas disconnect when replica re-connect with the same master" {
77+
# Add a second replica as a chained replica of the current replica
78+
$R(1) replicaof $R_host(0) $R_port(0)
79+
$R(2) replicaof $R_host(1) $R_port(1)
80+
wait_for_condition 50 1000 {
81+
[status $R(2) master_link_status] == "up"
82+
} else {
83+
fail "Chained replica not replicating from its master"
84+
}
85+
86+
# Do a write on the master, and wait for 3 seconds for the master to
87+
# send some PINGs to its replica
88+
$R(0) INCR counter2
89+
after 2000
90+
set sync_partial_master [status $R(0) sync_partial_ok]
91+
set sync_partial_replica [status $R(1) sync_partial_ok]
92+
$R(0) CONFIG SET repl-ping-replica-period 100
93+
94+
# Disconnect the master's direct replica
95+
$R(0) client kill type replica
96+
wait_for_condition 50 1000 {
97+
[status $R(1) master_link_status] == "up" &&
98+
[status $R(2) master_link_status] == "up" &&
99+
[status $R(0) sync_partial_ok] == $sync_partial_master + 1 &&
100+
[status $R(1) sync_partial_ok] == $sync_partial_replica + 1
101+
} else {
102+
fail "Disconnected replica failed to PSYNC with master"
103+
}
104+
105+
# Verify that the replica and its replica's meaningful and real
106+
# offsets match with the master
107+
assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
108+
assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
109+
assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset]
110+
assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset]
111+
}
112+
}}}

0 commit comments

Comments
 (0)