Skip to content

Commit

Permalink
Fix(volume): fix unexpected pod restart during resize storage size
Browse files Browse the repository at this point in the history
fix #5601
  • Loading branch information
ideascf committed Apr 2, 2024
1 parent 05631f8 commit afa4e56
Show file tree
Hide file tree
Showing 10 changed files with 82 additions and 11 deletions.
3 changes: 3 additions & 0 deletions pkg/apis/label/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ const (
// in tiflash container instead of init container for tiflash. With it annotated, the tiflash container will directly
// read config from files mounted by ConfigMap and that enables tiflash support hot-reload config.
AnnTiflashMountCMInTiflashContainer = "tiflash.tidb.pingcap.com/mount-cm-in-tiflash-container"
// AnnoConfigMapNameForNewSTS indicates that the xxx_member_manager should use the annotation value as name of ConfigMap
// if the value is not empty when xxx_member_manager try to CREATE sts.
AnnoConfigMapNameForNewSTS = "tidb.pingcap.com/configmap-name-for-new-sts"

// AnnPVCScaleInTime is pvc scaled in time key used in PVC for e2e test only
AnnPVCScaleInTime = "tidb.pingcap.com/scale-in-time"
Expand Down
9 changes: 9 additions & 0 deletions pkg/apis/pingcap/v1alpha1/component_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type ComponentAccessor interface {
NodeSelector() map[string]string
Labels() map[string]string
Annotations() map[string]string
SetAnnotation(key, value string)
Tolerations() []corev1.Toleration
PodSecurityContext() *corev1.PodSecurityContext
SchedulerName() string
Expand Down Expand Up @@ -303,6 +304,14 @@ func (a *componentAccessorImpl) Annotations() map[string]string {
return anno
}

func (a *componentAccessorImpl) SetAnnotation(key, value string) {
if a.ComponentSpec.Annotations == nil {
a.ComponentSpec.Annotations = map[string]string{}
}

a.ComponentSpec.Annotations[key] = value
}

func (a *componentAccessorImpl) Tolerations() []corev1.Toleration {
if a.ComponentSpec == nil || len(a.ComponentSpec.Tolerations) == 0 {
return a.tolerations
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller/stateful_set_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
type StatefulSetControlInterface interface {
CreateStatefulSet(runtime.Object, *apps.StatefulSet) error
UpdateStatefulSet(runtime.Object, *apps.StatefulSet) (*apps.StatefulSet, error)
DeleteStatefulSet(runtime.Object, *apps.StatefulSet) error
DeleteStatefulSet(runtime.Object, *apps.StatefulSet, metav1.DeleteOptions) error
}

type realStatefulSetControl struct {
Expand Down Expand Up @@ -112,7 +112,7 @@ func (c *realStatefulSetControl) UpdateStatefulSet(controller runtime.Object, se
}

// DeleteStatefulSet delete a StatefulSet in a TidbCluster.
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet) error {
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet, opts metav1.DeleteOptions) error {
controllerMo, ok := controller.(metav1.Object)
if !ok {
return fmt.Errorf("%T is not a metav1.Object, cannot call setControllerReference", controller)
Expand All @@ -121,7 +121,7 @@ func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, se
name := controllerMo.GetName()
namespace := controllerMo.GetNamespace()

err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, metav1.DeleteOptions{})
err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, opts)
c.recordStatefulSetEvent("delete", kind, name, controller, set, err)
return err
}
Expand Down Expand Up @@ -229,7 +229,7 @@ func (c *FakeStatefulSetControl) UpdateStatefulSet(_ runtime.Object, set *apps.S
}

// DeleteStatefulSet deletes the statefulset of SetIndexer
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet) error {
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet, _ metav1.DeleteOptions) error {
return nil
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/controller/stateful_set_control_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
appslisters "k8s.io/client-go/listers/apps/v1"
Expand Down Expand Up @@ -163,7 +164,7 @@ func TestStatefulSetControlDeleteStatefulSet(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, nil
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(Succeed())
events := collectEvents(recorder.Events)
g.Expect(events).To(HaveLen(1))
Expand All @@ -180,7 +181,7 @@ func TestStatefulSetControlDeleteStatefulSetFailed(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, apierrors.NewInternalError(errors.New("API server down"))
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(HaveOccurred())

events := collectEvents(recorder.Events)
Expand Down
5 changes: 4 additions & 1 deletion pkg/manager/member/tiflash_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
// if TiFlash is scale from 0 and with previous StatefulSet, we delete the previous StatefulSet first
// to avoid some fileds (e.g storage request) reused and cause unexpected behavior (e.g scale down).
if oldSetTmp != nil && *oldSetTmp.Spec.Replicas == 0 && oldSetTmp.Status.UpdatedReplicas == 0 && tc.Spec.TiFlash.Replicas > 0 {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp); err != nil && !errors.IsNotFound(err) {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("syncStatefulSet: fail to delete sts %s for cluster %s/%s, error: %s", controller.TiFlashMemberName(tcName), ns, tcName, err)
}
return controller.RequeueErrorf("wait for previous sts %s for cluster %s/%s to be deleted", controller.TiFlashMemberName(tcName), ns, tcName)
Expand Down Expand Up @@ -251,6 +251,9 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
klog.Infof("TidbCluster: %s/%s, waiting for PD cluster running", ns, tcName)
return nil
}
if _, err := mngerutils.OverwriteConfigMapNameWhenCreateSTS(klog.V(4), m.deps.ConfigMapLister, tc, v1alpha1.TiFlashMemberType, cm); err != nil {
return err
}
err = mngerutils.SetStatefulSetLastAppliedConfigAnnotation(newSet)
if err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/member/tikv_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ func (m *tikvMemberManager) syncStatefulSetForTidbCluster(tc *v1alpha1.TidbClust
return err
}
if setNotExist {
if _, err := mngerutils.OverwriteConfigMapNameWhenCreateSTS(klog.V(4), m.deps.ConfigMapLister, tc, v1alpha1.TiKVMemberType, cm); err != nil {
return err
}
err = mngerutils.SetStatefulSetLastAppliedConfigAnnotation(newSet)
if err != nil {
return err
Expand Down
28 changes: 28 additions & 0 deletions pkg/manager/utils/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ import (
"encoding/json"
"fmt"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/klog/v2"
)

func AddConfigMapDigestSuffix(cm *corev1.ConfigMap) error {
Expand Down Expand Up @@ -49,3 +54,26 @@ func FindConfigMapVolume(podSpec *corev1.PodSpec, pred func(string) bool) string
}
return ""
}

// OverwriteConfigMapNameWhenCreateSTS is used to overwrite ConfigMap name when create a new StatefulSet.
// In some cases, we may need to delete and recreate STS for updating some immutable fields and are
// expected to keep the name of ConfigMap unchanged to ensure no accidentally restart of pod.
// For example: Updating storage size, iops or throughput of PVC using by TiKV. Now,
// the annotation is set by pvc_resizer(not supported yet), pvc_modifier or pvc_replacer, See pkg/manager/utils/statefulset.go:DeleteStatefulSetWithOrphan.
func OverwriteConfigMapNameWhenCreateSTS(logger klog.Verbose, cmLister corelisters.ConfigMapLister, tc *v1alpha1.TidbCluster, componentType v1alpha1.MemberType, cm *corev1.ConfigMap) (overwritten bool, _ error) {
cmNameInAnno := tc.ComponentSpec(componentType).Annotations()[label.AnnoConfigMapNameForNewSTS]
if cmNameInAnno == "" || cm.Name == cmNameInAnno {
return false, nil
}

logger.Infof("another cm name found in AnnoConfigMapNameForNewSTS, use it. comp=%s, name=%s, nameInAnno=%s", componentType, cm.Name, cmNameInAnno)
cmInAnno, err := cmLister.ConfigMaps(tc.Namespace).Get(cmNameInAnno)
if err != nil {
return false, fmt.Errorf("failed to get configmap %s/%s: %w", tc.Namespace, cmNameInAnno, err)
}
if !equality.Semantic.DeepEqual(cmInAnno.Data, cm.Data) {
return false, fmt.Errorf("unexpected ConfigMap data change. comp=%s, name=%s, nameInAnno=%s", componentType, cm.Name, cmNameInAnno)
}
cm.Name = cmNameInAnno
return true, nil
}
25 changes: 25 additions & 0 deletions pkg/manager/utils/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
package utils

import (
"context"
"fmt"
"strings"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
Expand Down Expand Up @@ -203,3 +205,26 @@ func hackEphemeralVolumeMode(oldSts *apps.StatefulSet, newSts *apps.StatefulSet)
}
}
}

func DeleteStatefulSetWithOrphan(
ctx context.Context,
setCtl controller.StatefulSetControlInterface,
tcCtl controller.TidbClusterControlInterface,
tc *v1alpha1.TidbCluster, sts *apps.StatefulSet) error {

// Store the name of currently using configmap into TC to make sure xxx_member_manager can use the same ConfigMap name
// when creating(restore) new StatefulSet.
memberType := v1alpha1.MemberType(label.Label(sts.Labels).ComponentType())
inUseCMName := FindConfigMapVolume(&sts.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.MemberName(name, memberType))
})
tc.ComponentSpec(memberType).SetAnnotation(label.AnnoConfigMapNameForNewSTS, inUseCMName)
if _, err := tcCtl.Update(tc); err != nil {
return fmt.Errorf("update tc to save name of currently using configmap: %w", err)
}

// Delete sts and remain dependent as orphan
orphan := metav1.DeletePropagationOrphan
err := setCtl.DeleteStatefulSet(tc, sts, metav1.DeleteOptions{PropagationPolicy: &orphan})
return err
}
3 changes: 1 addition & 2 deletions pkg/manager/volumes/pvc_modifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ func (p *pvcModifier) tryToRecreateSTS(ctx *componentVolumeContext) error {
return fmt.Errorf("component sts %s/%s is upgrading", ctx.sts.Name, ctx.sts.Namespace)
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/manager/volumes/pvc_replacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/manager/utils"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errutil "k8s.io/apimachinery/pkg/util/errors"
Expand Down Expand Up @@ -150,8 +151,7 @@ func (p *pvcReplacer) tryToRecreateSTS(ctx *componentVolumeContext) error {
return nil
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down

0 comments on commit afa4e56

Please sign in to comment.