Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#5602
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
ideascf authored and ti-chi-bot committed Apr 7, 2024
1 parent d032996 commit f01ffdf
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 11 deletions.
14 changes: 14 additions & 0 deletions pkg/apis/label/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@ const (
AnnTiCDCGracefulShutdownBeginTime = "tidb.pingcap.com/ticdc-graceful-shutdown-begin-time"
// AnnStsLastSyncTimestamp is sts annotation key to indicate the last timestamp the operator sync the sts
AnnStsLastSyncTimestamp = "tidb.pingcap.com/sync-timestamp"
<<<<<<< HEAD
=======
// AnnTiflashMountCMInTiflashContainer is tiflash pod annotation key to indicate whether directly mount ConfigMap
// in tiflash container instead of init container for tiflash. With it annotated, the tiflash container will directly
// read config from files mounted by ConfigMap and that enables tiflash support hot-reload config.
AnnTiflashMountCMInTiflashContainer = "tiflash.tidb.pingcap.com/mount-cm-in-tiflash-container"
// AnnoPrefixConfigMapNameBeforeDelete is the last used ConfigMap name before STS deleted. xxx_member_manager should use its
// annotation value as ConfigMap name if the value is not empty when it tries to CREATE or RESTORE sts.
AnnoPrefixConfigMapNameBeforeDelete = "tidb.pingcap.com/configmap-name-before-delete-"
>>>>>>> b02b98ffd (Fix(volume): fix unexpected pod restart during resize storage size (#5602))

// AnnPVCScaleInTime is pvc scaled in time key used in PVC for e2e test only
AnnPVCScaleInTime = "tidb.pingcap.com/scale-in-time"
Expand Down Expand Up @@ -556,3 +566,7 @@ func (l Label) IsManagedByTiDBOperator() bool {
func (l Label) IsTidbClusterPod() bool {
return l[NameLabelKey] == "tidb-cluster"
}

func AnnoKeyOfConfigMapNameForNewSTS(compType string) string {
return AnnoPrefixConfigMapNameBeforeDelete + compType
}
8 changes: 4 additions & 4 deletions pkg/controller/stateful_set_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
type StatefulSetControlInterface interface {
CreateStatefulSet(runtime.Object, *apps.StatefulSet) error
UpdateStatefulSet(runtime.Object, *apps.StatefulSet) (*apps.StatefulSet, error)
DeleteStatefulSet(runtime.Object, *apps.StatefulSet) error
DeleteStatefulSet(runtime.Object, *apps.StatefulSet, metav1.DeleteOptions) error
}

type realStatefulSetControl struct {
Expand Down Expand Up @@ -112,7 +112,7 @@ func (c *realStatefulSetControl) UpdateStatefulSet(controller runtime.Object, se
}

// DeleteStatefulSet delete a StatefulSet in a TidbCluster.
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet) error {
func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, set *apps.StatefulSet, opts metav1.DeleteOptions) error {
controllerMo, ok := controller.(metav1.Object)
if !ok {
return fmt.Errorf("%T is not a metav1.Object, cannot call setControllerReference", controller)
Expand All @@ -121,7 +121,7 @@ func (c *realStatefulSetControl) DeleteStatefulSet(controller runtime.Object, se
name := controllerMo.GetName()
namespace := controllerMo.GetNamespace()

err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, metav1.DeleteOptions{})
err := c.kubeCli.AppsV1().StatefulSets(namespace).Delete(context.TODO(), set.Name, opts)
c.recordStatefulSetEvent("delete", kind, name, controller, set, err)
return err
}
Expand Down Expand Up @@ -229,7 +229,7 @@ func (c *FakeStatefulSetControl) UpdateStatefulSet(_ runtime.Object, set *apps.S
}

// DeleteStatefulSet deletes the statefulset of SetIndexer
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet) error {
func (c *FakeStatefulSetControl) DeleteStatefulSet(_ runtime.Object, _ *apps.StatefulSet, _ metav1.DeleteOptions) error {
return nil
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/controller/stateful_set_control_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
appslisters "k8s.io/client-go/listers/apps/v1"
Expand Down Expand Up @@ -163,7 +164,7 @@ func TestStatefulSetControlDeleteStatefulSet(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, nil
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(Succeed())
events := collectEvents(recorder.Events)
g.Expect(events).To(HaveLen(1))
Expand All @@ -180,7 +181,7 @@ func TestStatefulSetControlDeleteStatefulSetFailed(t *testing.T) {
fakeClient.AddReactor("delete", "statefulsets", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, apierrors.NewInternalError(errors.New("API server down"))
})
err := control.DeleteStatefulSet(tc, set)
err := control.DeleteStatefulSet(tc, set, metav1.DeleteOptions{})
g.Expect(err).To(HaveOccurred())

events := collectEvents(recorder.Events)
Expand Down
6 changes: 6 additions & 0 deletions pkg/manager/member/pd_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"path"
"regexp"
Expand Down Expand Up @@ -444,6 +445,11 @@ func (m *pdMemberManager) syncPDConfigMap(tc *v1alpha1.TidbCluster, set *apps.St
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.PDMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.PDMemberType, newCm)
if err != nil {
return nil, err
}
}

err = mngerutils.UpdateConfigMapIfNeed(m.deps.ConfigMapLister, tc.BasePDSpec().ConfigUpdateStrategy(), inUseName, newCm)
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/member/tidb_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,11 @@ func (m *tidbMemberManager) syncTiDBConfigMap(tc *v1alpha1.TidbCluster, set *app
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiDBMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiDBMemberType, newCm)
if err != nil {
return nil, err
}
}

klog.V(3).Info("get tidb in use config map name: ", inUseName)
Expand Down
8 changes: 7 additions & 1 deletion pkg/manager/member/tiflash_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"reflect"
"regexp"
Expand Down Expand Up @@ -210,7 +211,7 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
// if TiFlash is scale from 0 and with previous StatefulSet, we delete the previous StatefulSet first
// to avoid some fileds (e.g storage request) reused and cause unexpected behavior (e.g scale down).
if oldSetTmp != nil && *oldSetTmp.Spec.Replicas == 0 && oldSetTmp.Status.UpdatedReplicas == 0 && tc.Spec.TiFlash.Replicas > 0 {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp); err != nil && !errors.IsNotFound(err) {
if err := m.deps.StatefulSetControl.DeleteStatefulSet(tc, oldSetTmp, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) {
return fmt.Errorf("syncStatefulSet: fail to delete sts %s for cluster %s/%s, error: %s", controller.TiFlashMemberName(tcName), ns, tcName, err)
}
return controller.RequeueErrorf("wait for previous sts %s for cluster %s/%s to be deleted", controller.TiFlashMemberName(tcName), ns, tcName)
Expand Down Expand Up @@ -304,6 +305,11 @@ func (m *tiflashMemberManager) syncConfigMap(tc *v1alpha1.TidbCluster, set *apps
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiFlashMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiFlashMemberType, newCm)
if err != nil {
return nil, err
}
}

err = mngerutils.UpdateConfigMapIfNeed(m.deps.ConfigMapLister, tc.BaseTiFlashSpec().ConfigUpdateStrategy(), inUseName, newCm)
Expand Down
6 changes: 6 additions & 0 deletions pkg/manager/member/tikv_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"path"
"reflect"
Expand Down Expand Up @@ -300,6 +301,11 @@ func (m *tikvMemberManager) syncTiKVConfigMap(tc *v1alpha1.TidbCluster, set *app
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiKVMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiKVMemberType, newCm)
if err != nil {
return nil, err
}
}

err = mngerutils.UpdateConfigMapIfNeed(m.deps.ConfigMapLister, tc.BaseTiKVSpec().ConfigUpdateStrategy(), inUseName, newCm)
Expand Down
6 changes: 6 additions & 0 deletions pkg/manager/member/tiproxy_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package member

import (
"context"
"fmt"
"path"
"path/filepath"
Expand Down Expand Up @@ -175,6 +176,11 @@ func (m *tiproxyMemberManager) syncConfigMap(tc *v1alpha1.TidbCluster, set *apps
inUseName = mngerutils.FindConfigMapVolume(&set.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.TiProxyMemberName(tc.Name))
})
} else {
inUseName, err = mngerutils.FindConfigMapNameFromTCAnno(context.Background(), m.deps.ConfigMapLister, tc, v1alpha1.TiProxyMemberType, newCm)
if err != nil {
return nil, err
}
}

klog.V(4).Info("get tiproxy in use config map name: ", inUseName)
Expand Down
34 changes: 34 additions & 0 deletions pkg/manager/utils/configmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,17 @@
package utils

import (
"context"
"crypto/sha256"
"encoding/json"
"fmt"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/klog/v2"
)

func AddConfigMapDigestSuffix(cm *corev1.ConfigMap) error {
Expand Down Expand Up @@ -49,3 +55,31 @@ func FindConfigMapVolume(podSpec *corev1.PodSpec, pred func(string) bool) string
}
return ""
}

// FindConfigMapNameFromTCAnno is used to find ConfigMap name from tc.annotations which is saved before deleting STS.
// If the data of ConfigMap referenced in anno matches the newCm's data return the name in anno otherwise newCm's name.
//
// In some cases, we may need to delete and recreate STS for updating some immutable fields and are
// expected to keep the name of ConfigMap unchanged to ensure no accidentally restart of pod.
// For example: Updating storage size, iops or throughput of PVC using by TiKV. Now,
// the annotation is set by pvc_resizer(not supported yet), pvc_modifier or pvc_replacer, See pkg/manager/utils/statefulset.go:DeleteStatefulSetWithOrphan.
func FindConfigMapNameFromTCAnno(ctx context.Context, cmLister corelisters.ConfigMapLister, tc *v1alpha1.TidbCluster, componentType v1alpha1.MemberType, newCm *corev1.ConfigMap) (cmName string, _ error) {
logger := klog.FromContext(ctx).WithValues("comp", componentType, "tc", fmt.Sprintf("%s/%s", tc.Namespace, tc.Name))
cmNameInAnno := tc.Annotations[label.AnnoKeyOfConfigMapNameForNewSTS(string(componentType))]
if cmNameInAnno == "" || cmNameInAnno == newCm.Name {
return cmNameInAnno, nil
}

logger.Info("another cm name found in AnnoPrefixConfigMapNameBeforeDelete, try to use it as inuse name.", "cmName", newCm.Name, "nameInAnno", cmNameInAnno)
cmInAnno, err := cmLister.ConfigMaps(tc.Namespace).Get(cmNameInAnno)
if err != nil {
return "", fmt.Errorf("failed to get configmap %s/%s: %w", tc.Namespace, cmNameInAnno, err)
}
// In some cases, ConfigMap may be changed between deleting and creating STS. For example: suspend a cluster and then
// update its config. So just ignore the name in anno if ConfigMap data mismatches.
if !equality.Semantic.DeepEqual(cmInAnno.Data, newCm.Data) {
logger.Info("ConfigMap data changed, ignore the old name in Anno.", "cmName", newCm.Name, "cmNameInAnno", cmNameInAnno)
return newCm.Name, nil
}
return cmNameInAnno, nil
}
63 changes: 63 additions & 0 deletions pkg/manager/utils/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
package utils

import (
"context"
"fmt"
"strings"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
Expand Down Expand Up @@ -163,3 +165,64 @@ func SetUpgradePartition(set *apps.StatefulSet, upgradeOrdinal int32) {
set.Spec.UpdateStrategy.RollingUpdate = &apps.RollingUpdateStatefulSetStrategy{Partition: &upgradeOrdinal}
klog.Infof("set %s/%s partition to %d", set.GetNamespace(), set.GetName(), upgradeOrdinal)
}
<<<<<<< HEAD
=======

// hackEphemeralVolumeMode appends exstings ephemeral volume mode to asts so that no unexpected rolling update will be triggered.
// before https://github.com/pingcap/advanced-statefulset/pull/96, some asts may have volume mode in ephemeral volume,
// but after that, no volume mode in ephemeral volume will be set by defaults,
// so we need to append the volume mode to the new asts spec if it exists in old spec.
func hackEphemeralVolumeMode(oldSts *apps.StatefulSet, newSts *apps.StatefulSet) {
if !features.DefaultFeatureGate.Enabled(features.AdvancedStatefulSet) {
// only need this hack for AdvancedStatefulSet
return
}

volumeModels := make(map[string]corev1.PersistentVolumeMode)
for _, volume := range oldSts.Spec.Template.Spec.Volumes {
if volume.Ephemeral != nil && volume.Ephemeral.VolumeClaimTemplate != nil &&
volume.Ephemeral.VolumeClaimTemplate.Spec.VolumeMode != nil {
volumeModels[volume.Name] = *volume.Ephemeral.VolumeClaimTemplate.Spec.VolumeMode
}
}
if len(volumeModels) == 0 {
return // no need to hack
}

for _, volume := range newSts.Spec.Template.Spec.Volumes {
if volumeMode, ok := volumeModels[volume.Name]; ok && volume.Ephemeral != nil && volume.Ephemeral.VolumeClaimTemplate != nil &&
volume.Ephemeral.VolumeClaimTemplate.Spec.VolumeMode == nil {
volume.Ephemeral.VolumeClaimTemplate.Spec.VolumeMode = &volumeMode
klog.Infof("hack volume mode %s for volume %s in sts %s/%s", volumeMode, volume.Name, newSts.Namespace, newSts.Name)
}
}
}

func DeleteStatefulSetWithOrphan(
ctx context.Context,
setCtl controller.StatefulSetControlInterface,
tcCtl controller.TidbClusterControlInterface,
tc *v1alpha1.TidbCluster, sts *apps.StatefulSet) error {

// Store the name of currently using configmap into TC to make sure xxx_member_manager can use the same ConfigMap name
// when creating(restore) new StatefulSet. See pkg/manager/utils/configmap.go:FindConfigMapNameFromTCAnno.
memberType := v1alpha1.MemberType(label.Label(sts.Labels).ComponentType())
inUseCMName := FindConfigMapVolume(&sts.Spec.Template.Spec, func(name string) bool {
return strings.HasPrefix(name, controller.MemberName(tc.Name, memberType))
})
if tc.Annotations == nil {
tc.Annotations = map[string]string{}
}
tc.Annotations[label.AnnoKeyOfConfigMapNameForNewSTS(string(memberType))] = inUseCMName
logger := klog.FromContext(ctx).WithValues("comp", memberType, "tc", fmt.Sprintf("%s/%s", tc.Namespace, tc.Name))
logger.Info("store inuse configmap name in tc annotation", "name", inUseCMName)
if _, err := tcCtl.Update(tc); err != nil {
return fmt.Errorf("update tc to save name of currently using configmap: %w", err)
}

// Delete sts and remain dependent as orphan
orphan := metav1.DeletePropagationOrphan
err := setCtl.DeleteStatefulSet(tc, sts, metav1.DeleteOptions{PropagationPolicy: &orphan})
return err
}
>>>>>>> b02b98ffd (Fix(volume): fix unexpected pod restart during resize storage size (#5602))
3 changes: 1 addition & 2 deletions pkg/manager/volumes/pvc_modifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ func (p *pvcModifier) tryToRecreateSTS(ctx *componentVolumeContext) error {
return fmt.Errorf("component sts %s/%s is upgrading", ctx.sts.Name, ctx.sts.Namespace)
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/manager/volumes/pvc_replacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/manager/utils"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errutil "k8s.io/apimachinery/pkg/util/errors"
Expand Down Expand Up @@ -142,8 +143,7 @@ func (p *pvcReplacer) tryToRecreateSTS(ctx *componentVolumeContext) error {
return nil
}

orphan := metav1.DeletePropagationOrphan
if err := p.deps.KubeClientset.AppsV1().StatefulSets(ns).Delete(ctx, name, metav1.DeleteOptions{PropagationPolicy: &orphan}); err != nil {
if err := utils.DeleteStatefulSetWithOrphan(ctx, p.deps.StatefulSetControl, p.deps.TiDBClusterControl, ctx.tc, ctx.sts); err != nil {
return fmt.Errorf("delete sts %s/%s for component %s failed: %s", ns, name, ctx.ComponentID(), err)
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/manager/volumes/pvc_replacer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,9 @@ func TestPvcReplacerSync(t *testing.T) {
stop := make(chan struct{})
deps.KubeInformerFactory.Start(stop)
deps.KubeInformerFactory.WaitForCacheSync(stop)
// Because we are using statefulSetControl to delete StatefulSet and the fakeStatefulSetControl do nothing when calling DeleteStatefulSet.
// So we should give a REAL statefulSetControl to make sure the StatefulSet is really deleted to pass all tests.
deps.StatefulSetControl = controller.NewRealStatefuSetControl(deps.KubeClientset, deps.StatefulSetLister, deps.Recorder)
defer close(stop)
replacer := NewPVCReplacer(deps)
tc := makeTcAndK8Objects(deps, g, tt.sts, tt.pods)
Expand Down

0 comments on commit f01ffdf

Please sign in to comment.