Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubeadm: cleanup upgrade from no-TLS etcd to TLS etcd #71740

Merged
merged 1 commit into from
Dec 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 0 additions & 2 deletions cmd/kubeadm/app/phases/upgrade/compute_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ type fakeEtcdClient struct {
mismatchedVersions bool
}

func (f fakeEtcdClient) HasTLS() bool { return f.TLS }

func (f fakeEtcdClient) ClusterAvailable() (bool, error) { return true, nil }

func (f fakeEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) {
Expand Down
59 changes: 25 additions & 34 deletions cmd/kubeadm/app/phases/upgrade/staticpods.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,28 +165,27 @@ func (spm *KubeStaticPodPathManager) CleanupDirs() error {
return nil
}

func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, beforePodHash string, recoverManifests map[string]string, isTLSUpgrade bool) error {
func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, beforePodHash string, recoverManifests map[string]string) error {
// Special treatment is required for etcd case, when rollbackOldManifests should roll back etcd
// manifests only for the case when component is Etcd
recoverEtcd := false
waitForComponentRestart := true
if component == constants.Etcd {
recoverEtcd = true
}
if isTLSUpgrade {
// We currently depend on getting the Etcd mirror Pod hash from the KubeAPIServer;
// Upgrading the Etcd protocol takes down the apiserver, so we can't verify component restarts if we restart Etcd independently.
// Skip waiting for Etcd to restart and immediately move on to updating the apiserver.
if component == constants.Etcd {
waitForComponentRestart = false
}
// Normally, if an Etcd upgrade is successful, but the apiserver upgrade fails, Etcd is not rolled back.
// In the case of a TLS upgrade, the old KubeAPIServer config is incompatible with the new Etcd confg, so we rollback Etcd
// if the APIServer upgrade fails.
if component == constants.KubeAPIServer {
recoverEtcd = true
fmt.Printf("[upgrade/staticpods] The %s manifest will be restored if component %q fails to upgrade\n", constants.Etcd, component)
}

// We currently depend on getting the Etcd mirror Pod hash from the KubeAPIServer;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the code under to old``if isTLSUpgrade` should go away.

The case "upgrading the etcd protocol" doesn't exist anymore, so probably also the waitForComponentUpgrade should go away (this last point should be double checked)

// Upgrading the Etcd protocol takes down the apiserver, so we can't verify component restarts if we restart Etcd independently.
// Skip waiting for Etcd to restart and immediately move on to updating the apiserver.
if component == constants.Etcd {
waitForComponentRestart = false
}
// Normally, if an Etcd upgrade is successful, but the apiserver upgrade fails, Etcd is not rolled back.
// In the case of a TLS upgrade, the old KubeAPIServer config is incompatible with the new Etcd confg, so we rollback Etcd
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here for the if and for the recoverEtcd part

// if the APIServer upgrade fails.
if component == constants.KubeAPIServer {
recoverEtcd = true
fmt.Printf("[upgrade/staticpods] The %s manifest will be restored if component %q fails to upgrade\n", constants.Etcd, component)
}

if err := renewCerts(cfg, component); err != nil {
Expand Down Expand Up @@ -252,7 +251,7 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP
}

// performEtcdStaticPodUpgrade performs upgrade of etcd, it returns bool which indicates fatal error or not and the actual error.
func performEtcdStaticPodUpgrade(client clientset.Interface, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, recoverManifests map[string]string, isTLSUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) (bool, error) {
func performEtcdStaticPodUpgrade(client clientset.Interface, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, recoverManifests map[string]string, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) (bool, error) {
// Add etcd static pod spec only if external etcd is not configured
if cfg.Etcd.External != nil {
return false, errors.New("external etcd detected, won't try to change any etcd state")
Expand Down Expand Up @@ -313,20 +312,18 @@ func performEtcdStaticPodUpgrade(client clientset.Interface, waiter apiclient.Wa
}

// Waiter configurations for checking etcd status
// If we are upgrading TLS we need to wait for old static pod to be removed.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case if we are upgrading TLS doesn't exist anymore so pod restart delay IMO should go away

// This is needed because we are not able to currently verify that the static pod
// has been updated through the apiserver across an etcd TLS upgrade.
// This value is arbitrary but seems to be long enough in manual testing.
noDelay := 0 * time.Second
podRestartDelay := noDelay
if isTLSUpgrade {
// If we are upgrading TLS we need to wait for old static pod to be removed.
// This is needed because we are not able to currently verify that the static pod
// has been updated through the apiserver across an etcd TLS upgrade.
// This value is arbitrary but seems to be long enough in manual testing.
podRestartDelay = 30 * time.Second
}
podRestartDelay := 30 * time.Second

retries := 10
retryInterval := 15 * time.Second

// Perform etcd upgrade using common to all control plane components function
if err := upgradeComponent(constants.Etcd, waiter, pathMgr, cfg, beforeEtcdPodHash, recoverManifests, isTLSUpgrade); err != nil {
if err := upgradeComponent(constants.Etcd, waiter, pathMgr, cfg, beforeEtcdPodHash, recoverManifests); err != nil {
fmt.Printf("[upgrade/etcd] Failed to upgrade etcd: %v\n", err)
// Since upgrade component failed, the old etcd manifest has either been restored or was never touched
// Now we need to check the health of etcd cluster if it is up with old manifest
Expand Down Expand Up @@ -404,7 +401,6 @@ func performEtcdStaticPodUpgrade(client clientset.Interface, waiter apiclient.Wa
// StaticPodControlPlane upgrades a static pod-hosted control plane
func StaticPodControlPlane(client clientset.Interface, waiter apiclient.Waiter, pathMgr StaticPodPathManager, cfg *kubeadmapi.InitConfiguration, etcdUpgrade bool, oldEtcdClient, newEtcdClient etcdutil.ClusterInterrogator) error {
recoverManifests := map[string]string{}
var isTLSUpgrade bool
var isExternalEtcd bool

beforePodHashMap, err := waiter.WaitForStaticPodControlPlaneHashes(cfg.NodeRegistration.Name)
Expand Down Expand Up @@ -442,16 +438,11 @@ func StaticPodControlPlane(client clientset.Interface, waiter apiclient.Waiter,

// etcd upgrade is done prior to other control plane components
if !isExternalEtcd && etcdUpgrade {
previousEtcdHasTLS := oldEtcdClient.HasTLS()

// set the TLS upgrade flag for all components
isTLSUpgrade = !previousEtcdHasTLS
if isTLSUpgrade {
fmt.Printf("[upgrade/etcd] Upgrading to TLS for %s\n", constants.Etcd)
}
fmt.Printf("[upgrade/etcd] Upgrading to TLS for %s\n", constants.Etcd)

// Perform etcd upgrade using common to all control plane components function
fatal, err := performEtcdStaticPodUpgrade(client, waiter, pathMgr, cfg, recoverManifests, isTLSUpgrade, oldEtcdClient, newEtcdClient)
fatal, err := performEtcdStaticPodUpgrade(client, waiter, pathMgr, cfg, recoverManifests, oldEtcdClient, newEtcdClient)
if err != nil {
if fatal {
return err
Expand All @@ -468,7 +459,7 @@ func StaticPodControlPlane(client clientset.Interface, waiter apiclient.Waiter,
}

for _, component := range constants.MasterComponents {
if err = upgradeComponent(component, waiter, pathMgr, cfg, beforePodHashMap[component], recoverManifests, isTLSUpgrade); err != nil {
if err = upgradeComponent(component, waiter, pathMgr, cfg, beforePodHashMap[component], recoverManifests); err != nil {
return err
}
}
Expand Down
9 changes: 0 additions & 9 deletions cmd/kubeadm/app/phases/upgrade/staticpods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,6 @@ func (spm *fakeStaticPodPathManager) CleanupDirs() error {

type fakeTLSEtcdClient struct{ TLS bool }

func (c fakeTLSEtcdClient) HasTLS() bool {
return c.TLS
}

func (c fakeTLSEtcdClient) ClusterAvailable() (bool, error) { return true, nil }

func (c fakeTLSEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) {
Expand Down Expand Up @@ -263,11 +259,6 @@ func (c fakeTLSEtcdClient) AddMember(name string, peerAddrs string) ([]etcdutil.

type fakePodManifestEtcdClient struct{ ManifestDir, CertificatesDir string }

func (c fakePodManifestEtcdClient) HasTLS() bool {
hasTLS, _ := etcdutil.PodManifestsHaveTLS(c.ManifestDir)
return hasTLS
}

func (c fakePodManifestEtcdClient) ClusterAvailable() (bool, error) { return true, nil }

func (c fakePodManifestEtcdClient) WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error) {
Expand Down
6 changes: 0 additions & 6 deletions cmd/kubeadm/app/util/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ type ClusterInterrogator interface {
GetClusterStatus() (map[string]*clientv3.StatusResponse, error)
GetClusterVersions() (map[string]string, error)
GetVersion() (string, error)
HasTLS() bool
WaitForClusterAvailable(delay time.Duration, retries int, retryInterval time.Duration) (bool, error)
Sync() error
AddMember(name string, peerAddrs string) ([]Member, error)
Expand All @@ -55,11 +54,6 @@ type Client struct {
TLS *tls.Config
}

// HasTLS returns true if etcd is configured for TLS
func (c Client) HasTLS() bool {
return c.TLS != nil
}

// PodManifestsHaveTLS reads the etcd staticpod manifest from disk and returns false if the TLS flags
// are missing from the command list. If all the flags are present it returns true.
func PodManifestsHaveTLS(ManifestDir string) (bool, error) {
Expand Down