Skip to content

Commit

Permalink
add etcd as learner mode and promote when fg EtcdLearnerMode is enabled
Browse files Browse the repository at this point in the history
- use etcd backoff to wait; still has many warning messages
Signed-off-by: Paco Xu <paco.xu@daocloud.io>
  • Loading branch information
pacoxu committed Dec 16, 2022
1 parent 861db23 commit b3deecf
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 4 deletions.
2 changes: 1 addition & 1 deletion cmd/kubeadm/app/features/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ const (
PublicKeysECDSA = "PublicKeysECDSA"
// RootlessControlPlane is expected to be in alpha in v1.22
RootlessControlPlane = "RootlessControlPlane"
// EtcdLearnerMode is expected to be in alpha in v1.26
// EtcdLearnerMode is expected to be in alpha in v1.27
EtcdLearnerMode = "EtcdLearnerMode"
)

Expand Down
6 changes: 5 additions & 1 deletion cmd/kubeadm/app/phases/etcd/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,11 @@ func CreateStackedEtcdStaticPodManifestFile(client clientset.Interface, manifest
fmt.Printf("[etcd] Would add etcd member: %s\n", etcdPeerAddress)
} else {
klog.V(1).Infof("[etcd] Adding etcd member: %s", etcdPeerAddress)
cluster, err = etcdClient.AddMember(nodeName, etcdPeerAddress)
if features.Enabled(cfg.FeatureGates, features.EtcdLearnerMode) {
cluster, err = etcdClient.AddMemberAsLeanerAndPromote(nodeName, etcdPeerAddress)
} else {
cluster, err = etcdClient.AddMember(nodeName, etcdPeerAddress)
}
if err != nil {
return err
}
Expand Down
8 changes: 8 additions & 0 deletions cmd/kubeadm/app/phases/upgrade/staticpods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,10 @@ func (c fakeTLSEtcdClient) ListMembers() ([]etcdutil.Member, error) {
return []etcdutil.Member{}, nil
}

func (c fakeTLSEtcdClient) AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]etcdutil.Member, error) {
return []etcdutil.Member{}, nil
}

func (c fakeTLSEtcdClient) AddMember(name string, peerAddrs string) ([]etcdutil.Member, error) {
return []etcdutil.Member{}, nil
}
Expand Down Expand Up @@ -286,6 +290,10 @@ func (c fakePodManifestEtcdClient) ListMembers() ([]etcdutil.Member, error) {
return []etcdutil.Member{}, nil
}

func (c fakePodManifestEtcdClient) AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]etcdutil.Member, error) {
return []etcdutil.Member{}, nil
}

func (c fakePodManifestEtcdClient) AddMember(name string, peerAddrs string) ([]etcdutil.Member, error) {
return []etcdutil.Member{}, nil
}
Expand Down
50 changes: 48 additions & 2 deletions cmd/kubeadm/app/util/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type ClusterInterrogator interface {
Sync() error
ListMembers() ([]Member, error)
AddMember(name string, peerAddrs string) ([]Member, error)
AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]Member, error)
GetMemberID(peerURL string) (uint64, error)
RemoveMember(id uint64) ([]Member, error)
}
Expand Down Expand Up @@ -341,10 +342,20 @@ func (c *Client) RemoveMember(id uint64) ([]Member, error) {
return ret, nil
}

// AddMember notifies an existing etcd cluster that a new member is joining, and
// AddMember adds a new member into the etcd cluster
func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
return c.addMember(name, peerAddrs, false)
}

// AddMemberAsLeanerAndPromote adds a new learner member into the etcd cluster and promotes it to a voting member
func (c *Client) AddMemberAsLeanerAndPromote(name string, peerAddrs string) ([]Member, error) {
return c.addMember(name, peerAddrs, true)
}

// addMember notifies an existing etcd cluster that a new member is joining, and
// return the updated list of members. If the member has already been added to the
// cluster, this will return the existing list of etcd members.
func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Member, error) {
// Parse the peer address, required to add the client URL later to the list
// of endpoints for this client. Parsing as a first operation to make sure that
// if this fails no member addition is performed on the etcd cluster.
Expand Down Expand Up @@ -376,6 +387,21 @@ func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
defer cancel()
var resp *clientv3.MemberAddResponse
if isLearner {
klog.V(1).Infof("[etcd] Adding etcd member as learner: %016x", peerAddrs)
resp, err = cli.MemberAddAsLearner(ctx, []string{peerAddrs})
if err == nil {
learnerID := resp.Member.ID
err = memberPromoteWithRety(ctx, cli, learnerID)
if err != nil {
lastError = err
return false, lastError
}
respMembers = resp.Members
return true, nil
}
}

resp, err = cli.MemberAdd(ctx, []string{peerAddrs})
if err == nil {
respMembers = resp.Members
Expand Down Expand Up @@ -427,6 +453,26 @@ func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
return ret, nil
}

func memberPromoteWithRety(ctx context.Context, cli *clientv3.Client, learnerID uint64) error {
klog.V(1).Infof("[etcd] Promoting a learner as a voting member: %016x", learnerID)
err := wait.PollImmediate(5*time.Second, 5*time.Minute, func() (bool, error) {
// TODO: warning logs from etcd client should be removed.
// The warning logs are printed by etcd client code for several reasons, including
// 1. can not promote yet(no synced)
// 2. context deadline exceeded
// 3. peer URLs already exists
// Once the client provides a way to check if the etcd learner is ready to promote, the retry logic can be revisited.
_, err := cli.MemberPromote(ctx, learnerID)
if err == nil {
klog.V(1).Infof("[etcd] The learner was promoted as a voting member: %016x", learnerID)
return true, nil
}
klog.V(4).Infof("[etcd] Promoting the learner %016x failed: %v", learnerID, err)
return false, nil
})
return err
}

// CheckClusterHealth returns nil for status Up or error for status Down
func (c *Client) CheckClusterHealth() error {
_, err := c.getClusterStatus()
Expand Down

0 comments on commit b3deecf

Please sign in to comment.