Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workaround to bug 83713: GTID, MTR and relay log corruption #807

Merged
merged 12 commits into from
Mar 5, 2019
81 changes: 67 additions & 14 deletions go/inst/instance_topology_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ const (
GTIDHintForce = "GTIDHintForce"
)

const (
Error1201CouldnotInitializeMasterInfoStructure = "Error 1201:"
)

const sqlThreadPollDuration = 400 * time.Millisecond

// ExecInstance executes a given query on the given MySQL topology instance
Expand Down Expand Up @@ -592,6 +596,22 @@ func EnableMasterSSL(instanceKey *InstanceKey) (*Instance, error) {
return instance, err
}

// See https://bugs.mysql.com/bug.php?id=83713
func workaroundBug83713(instanceKey *InstanceKey) {
log.Debugf("workaroundBug83713: %+v", *instanceKey)
queries := []string{
`reset slave`,
`start slave IO_THREAD`,
`stop slave IO_THREAD`,
`reset slave`,
}
for _, query := range queries {
if _, err := ExecInstance(instanceKey, query); err != nil {
log.Debugf("workaroundBug83713: error on %s: %+v", query, err)
}
}
}

// ChangeMasterTo changes the given instance's master according to given input.
func ChangeMasterTo(instanceKey *InstanceKey, masterKey *InstanceKey, masterBinlogCoordinates *BinlogCoordinates, skipUnresolve bool, gtidHint OperationGTIDHint) (*Instance, error) {
instance, err := ReadTopologyInstance(instanceKey)
Expand Down Expand Up @@ -623,39 +643,67 @@ func ChangeMasterTo(instanceKey *InstanceKey, masterKey *InstanceKey, masterBinl
originalMasterKey := instance.MasterKey
originalExecBinlogCoordinates := instance.ExecBinlogCoordinates

var changeMasterFunc func() error
changedViaGTID := false
if instance.UsingMariaDBGTID && gtidHint != GTIDHintDeny {
// Keep on using GTID
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?",
changeToMasterKey.Hostname, changeToMasterKey.Port)
changeMasterFunc = func() error {
_, err := ExecInstance(instanceKey, "change master to master_host=?, master_port=?",
changeToMasterKey.Hostname, changeToMasterKey.Port)
return err
}
changedViaGTID = true
} else if instance.UsingMariaDBGTID && gtidHint == GTIDHintDeny {
// Make sure to not use GTID
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_log_file=?, master_log_pos=?, master_use_gtid=no",
changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)
changeMasterFunc = func() error {
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_log_file=?, master_log_pos=?, master_use_gtid=no",
changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)
return err
}
} else if instance.IsMariaDB() && gtidHint == GTIDHintForce {
// Is MariaDB; not using GTID, turn into GTID
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_use_gtid=slave_pos",
changeToMasterKey.Hostname, changeToMasterKey.Port)
changeMasterFunc = func() error {
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_use_gtid=slave_pos",
changeToMasterKey.Hostname, changeToMasterKey.Port)
return err
}
changedViaGTID = true
} else if instance.UsingOracleGTID && gtidHint != GTIDHintDeny {
// Is Oracle; already uses GTID; keep using it.
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?",
changeToMasterKey.Hostname, changeToMasterKey.Port)
changeMasterFunc = func() error {
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?",
changeToMasterKey.Hostname, changeToMasterKey.Port)
return err
}
changedViaGTID = true
} else if instance.UsingOracleGTID && gtidHint == GTIDHintDeny {
// Is Oracle; already uses GTID
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_log_file=?, master_log_pos=?, master_auto_position=0",
changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)
changeMasterFunc = func() error {
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_log_file=?, master_log_pos=?, master_auto_position=0",
changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)
return err
}
} else if instance.SupportsOracleGTID && gtidHint == GTIDHintForce {
// Is Oracle; not using GTID right now; turn into GTID
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_auto_position=1",
changeToMasterKey.Hostname, changeToMasterKey.Port)
changeMasterFunc = func() error {
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_auto_position=1",
changeToMasterKey.Hostname, changeToMasterKey.Port)
return err
}
changedViaGTID = true
} else {
// Normal binlog file:pos
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_log_file=?, master_log_pos=?",
changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)
changeMasterFunc = func() error {
_, err = ExecInstance(instanceKey, "change master to master_host=?, master_port=?, master_log_file=?, master_log_pos=?",
changeToMasterKey.Hostname, changeToMasterKey.Port, masterBinlogCoordinates.LogFile, masterBinlogCoordinates.LogPos)
return err
}
}
err = changeMasterFunc()
if err != nil && instance.UsingOracleGTID && strings.Contains(err.Error(), Error1201CouldnotInitializeMasterInfoStructure) {
log.Debugf("ChangeMasterTo: got %+v", err)
workaroundBug83713(instanceKey)
err = changeMasterFunc()
}
if err != nil {
return instance, log.Errore(err)
Expand Down Expand Up @@ -717,6 +765,11 @@ func ResetSlave(instanceKey *InstanceKey) (*Instance, error) {
return instance, log.Errore(err)
}
_, err = ExecInstance(instanceKey, `reset slave /*!50603 all */`)
if err != nil && strings.Contains(err.Error(), Error1201CouldnotInitializeMasterInfoStructure) {
log.Debugf("ResetSlave: got %+v", err)
workaroundBug83713(instanceKey)
_, err = ExecInstance(instanceKey, `reset slave /*!50603 all */`)
}
if err != nil {
return instance, log.Errore(err)
}
Expand Down
18 changes: 13 additions & 5 deletions go/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -833,17 +833,25 @@ func checkAndRecoverDeadMaster(analysisEntry inst.ReplicationAnalysis, candidate
if config.Config.ApplyMySQLPromotionAfterMasterFailover || analysisEntry.CommandHint == inst.GracefulMasterTakeoverCommandHint {
// on GracefulMasterTakeoverCommandHint it makes utter sense to RESET SLAVE ALL and read_only=0, and there is no sense in not doing so.
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: will apply MySQL changes to promoted master"))
if _, err := inst.ResetSlaveOperation(&promotedReplica.Key); true {
{
_, err := inst.ResetSlaveOperation(&promotedReplica.Key)
if err != nil {
// Ugly, but this is important. Let's give it another try
_, err = inst.ResetSlaveOperation(&promotedReplica.Key)
}
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: applying RESET SLAVE ALL on promoted master: success=%t", (err == nil)))
if err != nil {
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: NOTE that %+v is promoted even though SHOW SLAVE STATUS may still show it has a master", promotedReplica.Key))
}
}
if _, err := inst.SetReadOnly(&promotedReplica.Key, false); true {
{
_, err := inst.SetReadOnly(&promotedReplica.Key, false)
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: applying read-only=0 on promoted master: success=%t", (err == nil)))
}
// Let's attempt, though we won't necessarily succeed, to set old master as read-only
go func() {
if _, err := inst.SetReadOnly(&analysisEntry.AnalyzedInstanceKey, true); true {
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: applying read-only=1 on demoted master: success=%t", (err == nil)))
}
_, err := inst.SetReadOnly(&analysisEntry.AnalyzedInstanceKey, true)
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: applying read-only=1 on demoted master: success=%t", (err == nil)))
}()
}

Expand Down