Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix(volume): fix unexpected pod restart during resize storage size #5602

7 changes: 7 additions & 0 deletions pkg/apis/label/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ const (
// in tiflash container instead of init container for tiflash. With it annotated, the tiflash container will directly
// read config from files mounted by ConfigMap and that enables tiflash support hot-reload config.
AnnTiflashMountCMInTiflashContainer = "tiflash.tidb.pingcap.com/mount-cm-in-tiflash-container"
// AnnoPrefixConfigMapNameBeforeDelete is the last used ConfigMap name before STS deleted. xxx_member_manager should use its
// annotation value as ConfigMap name if the value is not empty when it tries to CREATE or RESTORE sts.
AnnoPrefixConfigMapNameBeforeDelete = "tidb.pingcap.com/configmap-name-before-delete-"

// AnnPVCScaleInTime is pvc scaled in time key used in PVC for e2e test only
AnnPVCScaleInTime = "tidb.pingcap.com/scale-in-time"
Expand Down Expand Up @@ -591,3 +594,7 @@ func (l Label) IsManagedByTiDBOperator() bool {
func (l Label) IsTidbClusterPod() bool {
return l[NameLabelKey] == "tidb-cluster"
}

func AnnoKeyOfConfigMapNameForNewSTS(compType string) string {
return AnnoPrefixConfigMapNameBeforeDelete + compType
}
8 changes: 4 additions & 4 deletions pkg/controller/stateful_set_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
type StatefulSetControlInterface interface {
CreateStatefulSet(runtime.Object, *apps.StatefulSet) error
UpdateStatefulSet(runtime.Object, *apps.StatefulSet) (*apps.StatefulSet, error)
DeleteStatefulSet(runtime.Object, *apps.StatefulSet) error
DeleteStatefulSet(runtime.Object, *apps.StatefulSet, metav1.DeleteOptions) error
}

type realStatefulSetControl struct {
Expand Down Expand Up @@ -112,7 +112,7 @@ func (c *realStatefulSetControl) UpdateStatefulSet(controller runtime.Object, se
}

// DeleteStatefulSet delete a StatefulSet in a TidbCluster.
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet) error {
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet, opts metav1.DeleteOptions) error {
controllerMo, ok := controller.(metav1.Object)
if !ok {
return fmt.Errorf("%T is not a metav1.Object, cannot call setControllerReference", controller)
Expand All @@ -121,7 +121,7 @@ func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, se
name := controllerMo.GetName()
namespace := controllerMo.GetNamespace()

err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, metav1.DeleteOptions{})
err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, opts)
c.recordStatefulSetEvent("delete", kind, name, controller, set, err)
return err
}
Expand Down Expand Up @@ -229,7 +229,7 @@ func (c *FakeStatefulSetControl) UpdateStatefulSet(_ runtime.Object, set *apps.S
}

// DeleteStatefulSet deletes the statefulset of SetIndexer
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet) error {
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet, _ metav1.DeleteOptions) error {
return nil
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/controller/stateful_set_control_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
appslisters "k8s.io/client-go/listers/apps/v1"
Expand Down Expand Up @@ -163,7 +164,7 @@ func TestStatefulSetControlDeleteStatefulSet(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, nil
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(Succeed())
events := collectEvents(recorder.Events)
g.Expect(events).To(HaveLen(1))
Expand All @@ -180,7 +181,7 @@ func TestStatefulSetControlDeleteStatefulSetFailed(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, apierrors.NewInternalError(errors.New("API server down"))
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(HaveOccurred())

events := collectEvents(recorder.Events)
Expand Down
6 changes: 6 additions & 0 deletions pkg/manager/member/pd_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"path"
"regexp"
Expand Down Expand Up @@ -450,6 +451,11 @@ func (m *pdMemberManager) syncPDConfigMap(tc *v1alpha1.TidbCluster, set *apps.St
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.PDMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.PDMemberType, newCm)
if err != nil {
return nil, err
}
}

err = mngerutils.UpdateConfigMapIfNeed(m.deps.ConfigMapLister, tc.BasePDSpec().ConfigUpdateStrategy(), inUseName, newCm)
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/member/tidb_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,11 @@ func (m *tidbMemberManager) syncTiDBConfigMap(tc *v1alpha1.TidbCluster, set *app
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiDBMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiDBMemberType, newCm)
if err != nil {
return nil, err
}
}

klog.V(3).Info("get tidb in use config map name: ", inUseName)
Expand Down
8 changes: 7 additions & 1 deletion pkg/manager/member/tiflash_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"reflect"
"regexp"
Expand Down Expand Up @@ -210,7 +211,7 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
// if TiFlash is scale from 0 and with previous StatefulSet, we delete the previous StatefulSet first
// to avoid some fileds (e.g storage request) reused and cause unexpected behavior (e.g scale down).
if oldSetTmp != nil && *oldSetTmp.Spec.Replicas == 0 && oldSetTmp.Status.UpdatedReplicas == 0 && tc.Spec.TiFlash.Replicas > 0 {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp); err != nil && !errors.IsNotFound(err) {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("syncStatefulSet: fail to delete sts %s for cluster %s/%s, error: %s", controller.TiFlashMemberName(tcName), ns, tcName, err)
}
return controller.RequeueErrorf("wait for previous sts %s for cluster %s/%s to be deleted", controller.TiFlashMemberName(tcName), ns, tcName)
Expand Down Expand Up @@ -304,6 +305,11 @@ func (m *tiflashMemberManager) syncConfigMap(tc *v1alpha1.TidbCluster, set *apps
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiFlashMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiFlashMemberType, newCm)
if err != nil {
return nil, err
}
}

err = mngerutils.UpdateConfigMapIfNeed(m.deps.ConfigMapLister, tc.BaseTiFlashSpec().ConfigUpdateStrategy(), inUseName, newCm)
Expand Down
6 changes: 6 additions & 0 deletions pkg/manager/member/tikv_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"path"
"reflect"
Expand Down Expand Up @@ -309,6 +310,11 @@ func (m *tikvMemberManager) syncTiKVConfigMap(tc *v1alpha1.TidbCluster, set *app
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiKVMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiKVMemberType, newCm)
if err != nil {
return nil, err
}
}

err = mngerutils.UpdateConfigMapIfNeed(m.deps.ConfigMapLister, tc.BaseTiKVSpec().ConfigUpdateStrategy(), inUseName, newCm)
Expand Down
6 changes: 6 additions & 0 deletions pkg/manager/member/tiproxy_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"path"
"path/filepath"
Expand Down Expand Up @@ -194,6 +195,11 @@ func (m *tiproxyMemberManager) syncConfigMap(tc *v1alpha1.TidbCluster, set *apps
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiProxyMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiProxyMemberType, newCm)
if err != nil {
return nil, err
}
}

klog.V(4).Info("get tiproxy in use config map name: ", inUseName)
Expand Down
34 changes: 34 additions & 0 deletions pkg/manager/utils/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,17 @@
package utils

import (
"context"
"crypto/sha256"
"encoding/json"
"fmt"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/klog/v2"
)

func AddConfigMapDigestSuffix(cm *corev1.ConfigMap) error {
Expand Down Expand Up @@ -49,3 +55,31 @@ func FindConfigMapVolume(podSpec *corev1.PodSpec, pred func(string) bool) string
}
return ""
}

// FindConfigMapNameFromTCAnno is used to find ConfigMap name from tc.annotations which is saved before deleting STS.
// If the data of ConfigMap referenced in anno matches the newCm's data return the name in anno otherwise newCm's name.
//
// In some cases, we may need to delete and recreate STS for updating some immutable fields and are
// expected to keep the name of ConfigMap unchanged to ensure no accidentally restart of pod.
// For example: Updating storage size, iops or throughput of PVC using by TiKV. Now,
// the annotation is set by pvc_resizer(not supported yet), pvc_modifier or pvc_replacer, See pkg/manager/utils/statefulset.go:DeleteStatefulSetWithOrphan.
func FindConfigMapNameFromTCAnno(ctx context.Context, cmLister corelisters.ConfigMapLister, tc *v1alpha1.TidbCluster, componentType v1alpha1.MemberType, newCm *corev1.ConfigMap) (cmName string, _ error) {
logger := klog.FromContext(ctx).WithValues("comp", componentType, "tc", fmt.Sprintf("%s/%s", tc.Namespace, tc.Name))
cmNameInAnno := tc.Annotations[label.AnnoKeyOfConfigMapNameForNewSTS(string(componentType))]
if cmNameInAnno == "" || cmNameInAnno == newCm.Name {
return cmNameInAnno, nil
}

logger.Info("another cm name found in AnnoPrefixConfigMapNameBeforeDelete, try to use it as inuse name.", "cmName", newCm.Name, "nameInAnno", cmNameInAnno)
cmInAnno, err := cmLister.ConfigMaps(tc.Namespace).Get(cmNameInAnno)
if err != nil {
return "", fmt.Errorf("failed to get configmap %s/%s: %w", tc.Namespace, cmNameInAnno, err)
}
// In some cases, ConfigMap may be changed between deleting and creating STS. For example: suspend a cluster and then
// update its config. So just ignore the name in anno if ConfigMap data mismatches.
if !equality.Semantic.DeepEqual(cmInAnno.Data, newCm.Data) {
logger.Info("ConfigMap data changed, ignore the old name in Anno.", "cmName", newCm.Name, "cmNameInAnno", cmNameInAnno)
return newCm.Name, nil
}
return cmNameInAnno, nil
}
30 changes: 30 additions & 0 deletions pkg/manager/utils/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
package utils

import (
"context"
"fmt"
"strings"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
Expand Down Expand Up @@ -203,3 +205,31 @@ func hackEphemeralVolumeMode(oldSts *apps.StatefulSet, newSts *apps.StatefulSet)
}
}
}

func DeleteStatefulSetWithOrphan(
ctx context.Context,
setCtl controller.StatefulSetControlInterface,
tcCtl controller.TidbClusterControlInterface,
tc *v1alpha1.TidbCluster, sts *apps.StatefulSet) error {

// Store the name of currently using configmap into TC to make sure xxx_member_manager can use the same ConfigMap name
// when creating(restore) new StatefulSet. See pkg/manager/utils/configmap.go:FindConfigMapNameFromTCAnno.
memberType := v1alpha1.MemberType(label.Label(sts.Labels).ComponentType())
inUseCMName := FindConfigMapVolume(&sts.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.MemberName(tc.Name, memberType))
})
if tc.Annotations == nil {
tc.Annotations = map[string]string{}
}
tc.Annotations[label.AnnoKeyOfConfigMapNameForNewSTS(string(memberType))] = inUseCMName
logger := klog.FromContext(ctx).WithValues("comp", memberType, "tc", fmt.Sprintf("%s/%s", tc.Namespace, tc.Name))
logger.Info("store inuse configmap name in tc annotation", "name", inUseCMName)
if _, err := tcCtl.Update(tc); err != nil {
return fmt.Errorf("update tc to save name of currently using configmap: %w", err)
}

// Delete sts and remain dependent as orphan
orphan := metav1.DeletePropagationOrphan
err := setCtl.DeleteStatefulSet(tc, sts, metav1.DeleteOptions{PropagationPolicy: &orphan})
return err
}
3 changes: 1 addition & 2 deletions pkg/manager/volumes/pvc_modifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ func (p *pvcModifier) tryToRecreateSTS(ctx *componentVolumeContext) error {
return fmt.Errorf("component sts %s/%s is upgrading", ctx.sts.Name, ctx.sts.Namespace)
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/manager/volumes/pvc_replacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/manager/utils"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errutil "k8s.io/apimachinery/pkg/util/errors"
Expand Down Expand Up @@ -150,8 +151,7 @@ func (p *pvcReplacer) tryToRecreateSTS(ctx *componentVolumeContext) error {
return nil
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/volumes/pvc_replacer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,9 @@ func TestPvcReplacerSync(t *testing.T) {
stop := make(chan struct{})
deps.KubeInformerFactory.Start(stop)
deps.KubeInformerFactory.WaitForCacheSync(stop)
// Because we are using statefulSetControl to delete StatefulSet and the fakeStatefulSetControl do nothing when calling DeleteStatefulSet.
// So we should give a REAL statefulSetControl to make sure the StatefulSet is really deleted to pass all tests.
deps.StatefulSetControl = controller.NewRealStatefuSetControl(deps.KubeClientset, deps.StatefulSetLister, deps.Recorder)
defer close(stop)
replacer := NewPVCReplacer(deps)
tc := makeTcAndK8Objects(deps, g, tt.sts, tt.pods)
Expand Down