Skip to content

Commit

Permalink
cherry pick pingcap#2607 to release-1.1
Browse files Browse the repository at this point in the history
Signed-off-by: sre-bot <sre-bot@pingcap.com>
  • Loading branch information
Yisaer authored and sre-bot committed Jun 8, 2020
1 parent 4bade93 commit 64eb7d3
Showing 1 changed file with 41 additions and 10 deletions.
51 changes: 41 additions & 10 deletions tests/failover.go
Expand Up @@ -61,43 +61,74 @@ func (oa *operatorActions) DeletePDDataThenCheckFailover(info *TidbClusterConfig
}
klog.Infof("delete pod %s/%s data successfully", ns, podName)

oldPD, err := oa.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
if err != nil {
klog.Error(err)
return err
}
// first we ensured that pd failover new pod, and failure member/pod should be deleted.
err = wait.Poll(10*time.Second, failoverTimeout+pdFailoverPeriod, func() (bool, error) {
tc, err := oa.cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{})
if err != nil {
klog.Error(err)
return false, nil
}

// ensure oldPD is deleted
newPd, err := oa.kubeCli.CoreV1().Pods(ns).Get(podName, metav1.GetOptions{})
if err != nil {
klog.Error(err)
return false, nil
}
if string(oldPD.UID) == string(newPd.UID) {
klog.Infof("oldPD should be deleted and newPD should be created")
return false, nil
}

// ensure failure member has deleted state
if len(tc.Status.PD.FailureMembers) == 1 {
klog.Infof("%#v", tc.Status.PD.FailureMembers)
return true, nil
for _, failureMember := range tc.Status.PD.FailureMembers {
if failureMember.MemberDeleted {
return true, nil
}
}
}
return false, nil
})
if err != nil {
return fmt.Errorf("failed to check pd %s/%s failover", ns, podName)
}
klog.Infof("check pd %s/%s failover successfully", ns, podName)
klog.Infof("check pd pod %s/%s failover marked successfully, new pod verified", ns, podName)

// Then we ensure pd failover recovery
err = wait.Poll(5*time.Second, 5*time.Minute, func() (done bool, err error) {
tc, err := oa.cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{})
if err != nil {
klog.Error(err.Error())
klog.Error(err)
return false, nil
}
if tc.Status.PD.FailureMembers == nil || len(tc.Status.PD.FailureMembers) < 1 {
return true, nil

if tc.Status.PD.FailureMembers != nil && len(tc.Status.PD.FailureMembers) > 0 {
klog.Error("pd failover should empty failure members in recovery")
return false, nil
}
tc.Status.PD.FailureMembers = nil
tc, err = oa.cli.PingcapV1alpha1().TidbClusters(ns).Update(tc)
pdSpecReplicas := tc.Spec.PD.Replicas
pdsts, err := oa.kubeCli.AppsV1().StatefulSets(ns).Get(fmt.Sprintf("%s-pd", tc.Name), metav1.GetOptions{})
if err != nil {
klog.Error(err.Error())
klog.Error(err)
return false, nil
}
return false, nil
if *pdsts.Spec.Replicas != pdSpecReplicas {
klog.Errorf("pdsts replicas[%d] should equal pdspec replicas[%d]", pdSpecReplicas, *pdsts.Spec.Replicas)
return false, nil
}
return true, nil
})
if err != nil {
return err
return fmt.Errorf("check pd cluster %s/%s recovery failed after failover", ns, tcName)
}
klog.Infof("pd cluster have been recovered")

err = oa.CheckTidbClusterStatus(info)
if err != nil {
Expand Down

0 comments on commit 64eb7d3

Please sign in to comment.