From 3e0b34cf6dbf0803f5d7aa9617a3c4497bcb5959 Mon Sep 17 00:00:00 2001 From: Kevin McGehee Date: Wed, 14 Oct 2015 12:03:47 -0700 Subject: [PATCH] Fix master timeout during handshake This change allows a slave to properly time out a dead master during the extended asynchronous synchronization state machine. Now, slaves will record their last interaction with the master and apply the replication timeout before a response to the PSYNC request is received. --- src/replication.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index 55a0340fae35..3c642f0b7ec0 100644 --- a/src/replication.c +++ b/src/replication.c @@ -41,6 +41,7 @@ void replicationDiscardCachedMaster(void); void replicationResurrectCachedMaster(int newfd); void replicationSendAck(void); void putSlaveOnline(redisClient *slave); +int serverInHandshakeState(int repl_state); /* --------------------------- Utility functions ---------------------------- */ @@ -1190,6 +1191,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) { return sdscatprintf(sdsempty(),"-Reading from master: %s", strerror(errno)); } + server.repl_transfer_lastio = server.unixtime; return sdsnew(buf); } return NULL; @@ -1619,7 +1621,7 @@ void undoConnectWithMaster(void) { int fd = server.repl_transfer_s; redisAssert(server.repl_state == REDIS_REPL_CONNECTING || - server.repl_state == REDIS_REPL_RECEIVE_PONG); + serverInHandshakeState(server.repl_state)); aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE); close(fd); server.repl_transfer_s = -1; @@ -1638,7 +1640,7 @@ int cancelReplicationHandshake(void) { if (server.repl_state == REDIS_REPL_TRANSFER) { replicationAbortSyncTransfer(); } else if (server.repl_state == REDIS_REPL_CONNECTING || - server.repl_state == REDIS_REPL_RECEIVE_PONG) + serverInHandshakeState(server.repl_state)) { undoConnectWithMaster(); } else { @@ -1810,6 +1812,20 @@ void roleCommand(redisClient *c) { } } +/* Returns 1 if the given replication state is a handshake state, + * 0 otherwise. */ +int serverInHandshakeState(int repl_state) { + return repl_state == REDIS_REPL_RECEIVE_PONG || + repl_state == REDIS_REPL_SEND_AUTH || + repl_state == REDIS_REPL_RECEIVE_AUTH || + repl_state == REDIS_REPL_SEND_PORT || + repl_state == REDIS_REPL_RECEIVE_PORT || + repl_state == REDIS_REPL_SEND_CAPA || + repl_state == REDIS_REPL_RECEIVE_CAPA || + repl_state == REDIS_REPL_SEND_PSYNC || + repl_state == REDIS_REPL_RECEIVE_PSYNC; +} + /* Send a REPLCONF ACK command to the master to inform it about the current * processed offset. If we are not connected with a master, the command has * no effects. */ @@ -2045,7 +2061,7 @@ void replicationCron(void) { /* Non blocking connection timeout? */ if (server.masterhost && (server.repl_state == REDIS_REPL_CONNECTING || - server.repl_state == REDIS_REPL_RECEIVE_PONG) && + serverInHandshakeState(server.repl_state)) && (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout) { redisLog(REDIS_WARNING,"Timeout connecting to the MASTER...");