Skip to content

Commit

Permalink
emergentlyRestartReplicationOnTopologyInstance fixes
Browse files Browse the repository at this point in the history
- restart replication fully (both SQL and IO threads)
- fix concurrency/itration bug
- slightly more logging

Yesterday I observed a situation where stop slave io_thread; start slave io_thread; was not enough to make a replica identify that it's master is broken. By stopping replication completely I hope this mitigates such behavior
  • Loading branch information
Shlomi Noach committed Nov 7, 2019
1 parent e05d957 commit 2893aec
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
9 changes: 5 additions & 4 deletions go/inst/instance_topology_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,12 @@ func SetSemiSyncReplica(instanceKey *InstanceKey, enableReplica bool) (*Instance

}

func RestartIOThread(instanceKey *InstanceKey) error {
for _, cmd := range []string{`stop slave io_thread`, `start slave io_thread`} {
func RestartReplicationQuick(instanceKey *InstanceKey) error {
for _, cmd := range []string{`stop slave sql_thread`, `stop slave io_thread`, `start slave io_thread`, `start slave sql_thread`} {
if _, err := ExecInstance(instanceKey, cmd); err != nil {
return log.Errorf("%+v: RestartIOThread: '%q' failed: %+v", *instanceKey, cmd, err)
return log.Errorf("%+v: RestartReplicationQuick: '%q' failed: %+v", *instanceKey, cmd, err)
} else {
log.Infof("%s on %+v as part of RestartReplicationQuick", cmd, *instanceKey)
}
}
return nil
Expand Down Expand Up @@ -400,7 +402,6 @@ func StopSlave(instanceKey *InstanceKey) (*Instance, error) {
}
}
if err != nil {

return instance, log.Errore(err)
}
instance, err = ReadTopologyInstance(instanceKey)
Expand Down
5 changes: 3 additions & 2 deletions go/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,7 @@ func emergentlyRestartReplicationOnTopologyInstance(instanceKey *inst.InstanceKe
return
}
go inst.ExecuteOnTopology(func() {
inst.RestartIOThread(instanceKey)
inst.RestartReplicationQuick(instanceKey)
inst.AuditOperation("emergently-restart-replication-topology-instance", instanceKey, string(analysisCode))
})
}
Expand Down Expand Up @@ -1444,7 +1444,8 @@ func emergentlyRestartReplicationOnTopologyInstanceReplicas(instanceKey *inst.In
return
}
for _, replica := range replicas {
go emergentlyRestartReplicationOnTopologyInstance(&replica.Key, analysisCode)
replicaKey := &replica.Key
go emergentlyRestartReplicationOnTopologyInstance(replicaKey, analysisCode)
}
}

Expand Down

0 comments on commit 2893aec

Please sign in to comment.