Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix(volume): fix unexpected pod restart during resize storage size #5602

7 changes: 7 additions & 0 deletions pkg/apis/label/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ const (
// in tiflash container instead of init container for tiflash. With it annotated, the tiflash container will directly
// read config from files mounted by ConfigMap and that enables tiflash support hot-reload config.
AnnTiflashMountCMInTiflashContainer = "tiflash.tidb.pingcap.com/mount-cm-in-tiflash-container"
// AnnoConfigMapNameForNewSTSPrefix indicates that the xxx_member_manager should use the annotation value as name of ConfigMap
// if the value is not empty when xxx_member_manager try to CREATE sts.
AnnoConfigMapNameForNewSTSPrefix = "tidb.pingcap.com/configmap-name-for-new-sts/"

// AnnPVCScaleInTime is pvc scaled in time key used in PVC for e2e test only
AnnPVCScaleInTime = "tidb.pingcap.com/scale-in-time"
Expand Down Expand Up @@ -591,3 +594,7 @@ func (l Label) IsManagedByTiDBOperator() bool {
func (l Label) IsTidbClusterPod() bool {
return l[NameLabelKey] == "tidb-cluster"
}

func AnnoKeyOfConfigMapNameForNewSTS(compType string) string {
return AnnoConfigMapNameForNewSTSPrefix + compType
}
9 changes: 9 additions & 0 deletions pkg/apis/pingcap/v1alpha1/component_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type ComponentAccessor interface {
NodeSelector() map[string]string
Labels() map[string]string
Annotations() map[string]string
SetAnnotation(key, value string)
Tolerations() []corev1.Toleration
PodSecurityContext() *corev1.PodSecurityContext
SchedulerName() string
Expand Down Expand Up @@ -303,6 +304,14 @@ func (a *componentAccessorImpl) Annotations() map[string]string {
return anno
}

func (a *componentAccessorImpl) SetAnnotation(key, value string) {
if a.ComponentSpec.Annotations == nil {
a.ComponentSpec.Annotations = map[string]string{}
}

a.ComponentSpec.Annotations[key] = value
}

func (a *componentAccessorImpl) Tolerations() []corev1.Toleration {
if a.ComponentSpec == nil || len(a.ComponentSpec.Tolerations) == 0 {
return a.tolerations
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller/stateful_set_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
type StatefulSetControlInterface interface {
CreateStatefulSet(runtime.Object, *apps.StatefulSet) error
UpdateStatefulSet(runtime.Object, *apps.StatefulSet) (*apps.StatefulSet, error)
DeleteStatefulSet(runtime.Object, *apps.StatefulSet) error
DeleteStatefulSet(runtime.Object, *apps.StatefulSet, metav1.DeleteOptions) error
}

type realStatefulSetControl struct {
Expand Down Expand Up @@ -112,7 +112,7 @@ func (c *realStatefulSetControl) UpdateStatefulSet(controller runtime.Object, se
}

// DeleteStatefulSet delete a StatefulSet in a TidbCluster.
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet) error {
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet, opts metav1.DeleteOptions) error {
controllerMo, ok := controller.(metav1.Object)
if !ok {
return fmt.Errorf("%T is not a metav1.Object, cannot call setControllerReference", controller)
Expand All @@ -121,7 +121,7 @@ func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, se
name := controllerMo.GetName()
namespace := controllerMo.GetNamespace()

err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, metav1.DeleteOptions{})
err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, opts)
c.recordStatefulSetEvent("delete", kind, name, controller, set, err)
return err
}
Expand Down Expand Up @@ -229,7 +229,7 @@ func (c *FakeStatefulSetControl) UpdateStatefulSet(_ runtime.Object, set *apps.S
}

// DeleteStatefulSet deletes the statefulset of SetIndexer
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet) error {
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet, _ metav1.DeleteOptions) error {
return nil
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/controller/stateful_set_control_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
appslisters "k8s.io/client-go/listers/apps/v1"
Expand Down Expand Up @@ -163,7 +164,7 @@ func TestStatefulSetControlDeleteStatefulSet(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, nil
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(Succeed())
events := collectEvents(recorder.Events)
g.Expect(events).To(HaveLen(1))
Expand All @@ -180,7 +181,7 @@ func TestStatefulSetControlDeleteStatefulSetFailed(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, apierrors.NewInternalError(errors.New("API server down"))
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(HaveOccurred())

events := collectEvents(recorder.Events)
Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/member/pd_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ func (m *pdMemberManager) syncPDStatefulSetForTidbCluster(tc *v1alpha1.TidbClust
return err
}
if setNotExist {
if _, err := mngerutils.KeepConfigMapNameUnchangedWhenCreateSTS(klog.V(4), m.deps.ConfigMapLister, tc, v1alpha1.PDMemberType, cm); err != nil {
return err
}
err = mngerutils.SetStatefulSetLastAppliedConfigAnnotation(newPDSet)
if err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/member/tidb_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ func (m *tidbMemberManager) syncTiDBStatefulSetForTidbCluster(tc *v1alpha1.TidbC
}

if setNotExist {
if _, err := mngerutils.KeepConfigMapNameUnchangedWhenCreateSTS(klog.V(4), m.deps.ConfigMapLister, tc, v1alpha1.TiDBMemberType, cm); err != nil {
return err
}
err = mngerutils.SetStatefulSetLastAppliedConfigAnnotation(newTiDBSet)
if err != nil {
return err
Expand Down
5 changes: 4 additions & 1 deletion pkg/manager/member/tiflash_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
// if TiFlash is scale from 0 and with previous StatefulSet, we delete the previous StatefulSet first
// to avoid some fileds (e.g storage request) reused and cause unexpected behavior (e.g scale down).
if oldSetTmp != nil && *oldSetTmp.Spec.Replicas == 0 && oldSetTmp.Status.UpdatedReplicas == 0 && tc.Spec.TiFlash.Replicas > 0 {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp); err != nil && !errors.IsNotFound(err) {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("syncStatefulSet: fail to delete sts %s for cluster %s/%s, error: %s", controller.TiFlashMemberName(tcName), ns, tcName, err)
}
return controller.RequeueErrorf("wait for previous sts %s for cluster %s/%s to be deleted", controller.TiFlashMemberName(tcName), ns, tcName)
Expand Down Expand Up @@ -251,6 +251,9 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
klog.Infof("TidbCluster: %s/%s, waiting for PD cluster running", ns, tcName)
return nil
}
if _, err := mngerutils.KeepConfigMapNameUnchangedWhenCreateSTS(klog.V(4), m.deps.ConfigMapLister, tc, v1alpha1.TiFlashMemberType, cm); err != nil {
return err
}
err = mngerutils.SetStatefulSetLastAppliedConfigAnnotation(newSet)
if err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/member/tikv_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ func (m *tikvMemberManager) syncStatefulSetForTidbCluster(tc *v1alpha1.TidbClust
return err
}
if setNotExist {
if _, err := mngerutils.KeepConfigMapNameUnchangedWhenCreateSTS(klog.V(4), m.deps.ConfigMapLister, tc, v1alpha1.TiKVMemberType, cm); err != nil {
return err
}
err = mngerutils.SetStatefulSetLastAppliedConfigAnnotation(newSet)
if err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/member/tiproxy_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ func (m *tiproxyMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
}

if stsNotExist {
if _, err := mngerutils.KeepConfigMapNameUnchangedWhenCreateSTS(klog.V(4), m.deps.ConfigMapLister, tc, v1alpha1.PDMemberType, cm); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TiProxyMemberType

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed. ab80380

return err
}
err = mngerutils.SetStatefulSetLastAppliedConfigAnnotation(newSts)
if err != nil {
return err
Expand Down
29 changes: 29 additions & 0 deletions pkg/manager/utils/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ import (
"encoding/json"
"fmt"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/klog/v2"
)

func AddConfigMapDigestSuffix(cm *corev1.ConfigMap) error {
Expand Down Expand Up @@ -49,3 +54,27 @@ func FindConfigMapVolume(podSpec *corev1.PodSpec, pred func(string) bool) string
}
return ""
}

// KeepConfigMapNameUnchangedWhenCreateSTS is used to overwrite ConfigMap name to keep ConfigMap name remains unchanged
// when create a new StatefulSet.
// In some cases, we may need to delete and recreate STS for updating some immutable fields and are
// expected to keep the name of ConfigMap unchanged to ensure no accidentally restart of pod.
// For example: Updating storage size, iops or throughput of PVC using by TiKV. Now,
// the annotation is set by pvc_resizer(not supported yet), pvc_modifier or pvc_replacer, See pkg/manager/utils/statefulset.go:DeleteStatefulSetWithOrphan.
func KeepConfigMapNameUnchangedWhenCreateSTS(logger klog.Verbose, cmLister corelisters.ConfigMapLister, tc *v1alpha1.TidbCluster, componentType v1alpha1.MemberType, cm *corev1.ConfigMap) (overwritten bool, _ error) {
cmNameInAnno := tc.ComponentSpec(componentType).Annotations()[label.AnnoKeyOfConfigMapNameForNewSTS(string(componentType))]
if cmNameInAnno == "" || cm.Name == cmNameInAnno {
return false, nil
}

logger.Infof("another cm name found in AnnoConfigMapNameForNewSTSPrefix, use it. comp=%s, name=%s, nameInAnno=%s", componentType, cm.Name, cmNameInAnno)
cmInAnno, err := cmLister.ConfigMaps(tc.Namespace).Get(cmNameInAnno)
if err != nil {
return false, fmt.Errorf("failed to get configmap %s/%s: %w", tc.Namespace, cmNameInAnno, err)
}
if !equality.Semantic.DeepEqual(cmInAnno.Data, cm.Data) {
return false, fmt.Errorf("unexpected ConfigMap data change. comp=%s, name=%s, nameInAnno=%s", componentType, cm.Name, cmNameInAnno)
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To ensure no ConfigMap data changed between delete and recreate STS

cm.Name = cmNameInAnno
return true, nil
}
25 changes: 25 additions & 0 deletions pkg/manager/utils/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
package utils

import (
"context"
"fmt"
"strings"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
Expand Down Expand Up @@ -203,3 +205,26 @@ func hackEphemeralVolumeMode(oldSts *apps.StatefulSet, newSts *apps.StatefulSet)
}
}
}

func DeleteStatefulSetWithOrphan(
ctx context.Context,
setCtl controller.StatefulSetControlInterface,
tcCtl controller.TidbClusterControlInterface,
tc *v1alpha1.TidbCluster, sts *apps.StatefulSet) error {

// Store the name of currently using configmap into TC to make sure xxx_member_manager can use the same ConfigMap name
// when creating(restore) new StatefulSet. See pkg/manager/utils/configmap.go:KeepConfigMapNameUnchangedWhenCreateSTS.
memberType := v1alpha1.MemberType(label.Label(sts.Labels).ComponentType())
inUseCMName := FindConfigMapVolume(&sts.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.MemberName(name, memberType))
})
tc.ComponentSpec(memberType).SetAnnotation(label.AnnoKeyOfConfigMapNameForNewSTS(string(memberType)), inUseCMName)
if _, err := tcCtl.Update(tc); err != nil {
return fmt.Errorf("update tc to save name of currently using configmap: %w", err)
}

// Delete sts and remain dependent as orphan
orphan := metav1.DeletePropagationOrphan
err := setCtl.DeleteStatefulSet(tc, sts, metav1.DeleteOptions{PropagationPolicy: &orphan})
return err
}
3 changes: 1 addition & 2 deletions pkg/manager/volumes/pvc_modifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ func (p *pvcModifier) tryToRecreateSTS(ctx *componentVolumeContext) error {
return fmt.Errorf("component sts %s/%s is upgrading", ctx.sts.Name, ctx.sts.Namespace)
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/manager/volumes/pvc_replacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/manager/utils"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errutil "k8s.io/apimachinery/pkg/util/errors"
Expand Down Expand Up @@ -150,8 +151,7 @@ func (p *pvcReplacer) tryToRecreateSTS(ctx *componentVolumeContext) error {
return nil
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down