Skip to content

Commit

Permalink
Add TiFlash rolling upgrade logic (#3789)
Browse files Browse the repository at this point in the history
* add tiflash rolling upgrade logic

Co-authored-by: DanielZhangQD <36026334+DanielZhangQD@users.noreply.github.com>
  • Loading branch information
handlerww and DanielZhangQD committed Feb 24, 2021
1 parent 120c41e commit ff03e86
Show file tree
Hide file tree
Showing 4 changed files with 501 additions and 6 deletions.
21 changes: 17 additions & 4 deletions pkg/manager/member/tiflash_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
}
}

if !templateEqual(newSet, oldSet) {
if !templateEqual(newSet, oldSet) || tc.Status.TiFlash.Phase == v1alpha1.UpgradePhase {
if err := m.upgrader.Upgrade(tc, oldSet, newSet); err != nil {
return err
}
Expand Down Expand Up @@ -420,6 +420,11 @@ func getNewStatefulSet(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*apps.St
capacity := controller.TiKVCapacity(tc.Spec.TiFlash.Limits)
headlessSvcName := controller.TiFlashPeerMemberName(tcName)

deleteSlotsNumber, err := util.GetDeleteSlotsNumber(stsAnnotations)
if err != nil {
return nil, fmt.Errorf("get delete slots number of statefulset %s/%s failed, err:%v", ns, setName, err)
}

env := []corev1.EnvVar{
{
Name: "NAMESPACE",
Expand Down Expand Up @@ -516,6 +521,16 @@ func getNewStatefulSet(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*apps.St
podSpec.ServiceAccountName = tc.Spec.ServiceAccount
}

updateStrategy := apps.StatefulSetUpdateStrategy{}
if baseTiFlashSpec.StatefulSetUpdateStrategy() == apps.OnDeleteStatefulSetStrategyType {
updateStrategy.Type = apps.OnDeleteStatefulSetStrategyType
} else {
updateStrategy.Type = apps.RollingUpdateStatefulSetStrategyType
updateStrategy.RollingUpdate = &apps.RollingUpdateStatefulSetStrategy{
Partition: pointer.Int32Ptr(tc.TiFlashStsDesiredReplicas() + deleteSlotsNumber),
}
}

tiflashset := &apps.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: setName,
Expand All @@ -537,9 +552,7 @@ func getNewStatefulSet(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*apps.St
VolumeClaimTemplates: pvcs,
ServiceName: headlessSvcName,
PodManagementPolicy: apps.ParallelPodManagement,
UpdateStrategy: apps.StatefulSetUpdateStrategy{
Type: baseTiFlashSpec.StatefulSetUpdateStrategy(),
},
UpdateStrategy: updateStrategy,
},
}
return tiflashset, nil
Expand Down
78 changes: 76 additions & 2 deletions pkg/manager/member/tiflash_upgrader.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@
package member

import (
"fmt"

"github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
apps "k8s.io/api/apps/v1"
"k8s.io/klog"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
)

type tiflashUpgrader struct {
Expand All @@ -30,8 +35,10 @@ func NewTiFlashUpgrader(deps *controller.Dependencies) Upgrader {
}
}

func (tku *tiflashUpgrader) Upgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error {
// Wait for PD, TiKV and TiDB to finish upgrade
func (u *tiflashUpgrader) Upgrade(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSet, newSet *apps.StatefulSet) error {
ns := tc.GetNamespace()
tcName := tc.GetName()

if tc.Status.PD.Phase == v1alpha1.UpgradePhase || tc.Status.TiKV.Phase == v1alpha1.UpgradePhase ||
tc.Status.TiDB.Phase == v1alpha1.UpgradePhase {
_, podSpec, err := GetLastAppliedConfig(oldSet)
Expand All @@ -41,6 +48,73 @@ func (tku *tiflashUpgrader) Upgrade(tc *v1alpha1.TidbCluster, oldSet *apps.State
newSet.Spec.Template.Spec = *podSpec
return nil
}

if !tc.Status.TiFlash.Synced {
return fmt.Errorf("cluster: [%s/%s]'s TiFlash status is not synced, can not upgrade", ns, tcName)
}

tc.Status.TiFlash.Phase = v1alpha1.UpgradePhase
if !templateEqual(newSet, oldSet) {
return nil
}

if tc.Status.TiFlash.StatefulSet.UpdateRevision == tc.Status.TiFlash.StatefulSet.CurrentRevision {
return nil
}

if oldSet.Spec.UpdateStrategy.Type == apps.OnDeleteStatefulSetStrategyType || oldSet.Spec.UpdateStrategy.RollingUpdate == nil {
// Manually bypass tidb-operator to modify statefulset directly, such as modify tikv statefulset's RollingUpdate strategy to OnDelete strategy,
// or set RollingUpdate to nil, skip tidb-operator's rolling update logic in order to speed up the upgrade in the test environment occasionally.
// If we encounter this situation, we will let the native statefulset controller do the upgrade completely, which may be unsafe for upgrading tikv.
// Therefore, in the production environment, we should try to avoid modifying the tikv statefulset update strategy directly.
newSet.Spec.UpdateStrategy = oldSet.Spec.UpdateStrategy
klog.Warningf("tidbcluster: [%s/%s] TiFlash statefulset %s UpdateStrategy has been modified manually", ns, tcName, oldSet.GetName())
return nil
}

setUpgradePartition(newSet, *oldSet.Spec.UpdateStrategy.RollingUpdate.Partition)
podOrdinals := helper.GetPodOrdinals(*oldSet.Spec.Replicas, oldSet).List()
for _i := len(podOrdinals) - 1; _i >= 0; _i-- {
i := podOrdinals[_i]
store := getTiFlashStoreByOrdinal(tc.GetName(), tc.Status.TiFlash, i)
if store == nil {
setUpgradePartition(newSet, i)
continue
}
podName := TiFlashPodName(tcName, i)
pod, err := u.deps.PodLister.Pods(ns).Get(podName)
if err != nil {
return fmt.Errorf("TiFlashUpgrader.Upgrade: failed to get pods %s for cluster %s/%s, error: %s", podName, ns, tcName, err)
}
revision, exist := pod.Labels[apps.ControllerRevisionHashLabelKey]
if !exist {
return controller.RequeueErrorf("tidbcluster: [%s/%s]'s TiFlash pod: [%s] has no label: %s", ns, tcName, podName, apps.ControllerRevisionHashLabelKey)
}

if revision == tc.Status.TiFlash.StatefulSet.UpdateRevision {
if !podutil.IsPodReady(pod) {
return controller.RequeueErrorf("tidbcluster: [%s/%s]'s upgraded TiFlash pod: [%s] is not ready", ns, tcName, podName)
}
if store.State != v1alpha1.TiKVStateUp {
return controller.RequeueErrorf("tidbcluster: [%s/%s]'s upgraded TiFlash pod: [%s], store state is not UP", ns, tcName, podName)
}
continue
}

setUpgradePartition(newSet, i)
return nil
}

return nil
}

func getTiFlashStoreByOrdinal(name string, status v1alpha1.TiFlashStatus, ordinal int32) *v1alpha1.TiKVStore {
podName := TiFlashPodName(name, ordinal)
for _, store := range status.Stores {
if store.PodName == podName {
return &store
}
}
return nil
}

Expand Down

0 comments on commit ff03e86

Please sign in to comment.