From 65292880a0496b55bb87b6fbd13a10679e6c990b Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Tue, 17 Aug 2021 14:40:26 +0300 Subject: [PATCH] feat: check if cluster has deprecated resources versions Fixes: https://github.com/talos-systems/talos/issues/4026 Signed-off-by: Artem Chernyshev --- cmd/talosctl/cmd/talos/upgrade-k8s.go | 1 + hack/release.toml | 8 + pkg/cluster/kubernetes/daemonset.go | 6 + pkg/cluster/kubernetes/talos_managed.go | 208 +++++++++++++++++- pkg/cluster/kubernetes/upgrade.go | 1 + .../docs/v0.12/Guides/upgrading-kubernetes.md | 78 ++++--- website/content/docs/v0.12/Reference/cli.md | 1 + 7 files changed, 275 insertions(+), 28 deletions(-) diff --git a/cmd/talosctl/cmd/talos/upgrade-k8s.go b/cmd/talosctl/cmd/talos/upgrade-k8s.go index 99bd0447b4..d099f15111 100644 --- a/cmd/talosctl/cmd/talos/upgrade-k8s.go +++ b/cmd/talosctl/cmd/talos/upgrade-k8s.go @@ -34,6 +34,7 @@ func init() { upgradeK8sCmd.Flags().StringVar(&upgradeOptions.FromVersion, "from", "", "the Kubernetes control plane version to upgrade from") upgradeK8sCmd.Flags().StringVar(&upgradeOptions.ToVersion, "to", constants.DefaultKubernetesVersion, "the Kubernetes control plane version to upgrade to") upgradeK8sCmd.Flags().StringVar(&upgradeOptions.ControlPlaneEndpoint, "endpoint", "", "the cluster control plane endpoint") + upgradeK8sCmd.Flags().BoolVar(&upgradeOptions.DryRun, "dry-run", false, "skip the actual upgrade and show the upgrade plan instead") cli.Should(upgradeK8sCmd.MarkFlagRequired("to")) addCommand(upgradeK8sCmd) } diff --git a/hack/release.toml b/hack/release.toml index 0a2cde3048..8e0536b596 100644 --- a/hack/release.toml +++ b/hack/release.toml @@ -108,6 +108,14 @@ Talos automatically re-assigns IP using the Equinix Metal API when leadership ch description = """\ Unknown keys in the machine config now make the config invalid, so any attempt to apply/edit the configuration with the unknown keys will lead into an error. +""" + + [notes.upgrade] + title = "Kubernetes Upgrade" + description = """\ +`talosctl upgrade-k8s` now checks if cluster has any resources which are going to be removed or migrated to the new version after upgrade +and shows that as a warning before the upgrade. +Additionally, `upgrade-k8s` command now has `--dry-run` flag that only prints out warnings and upgrade summary. """ [make_deps] diff --git a/pkg/cluster/kubernetes/daemonset.go b/pkg/cluster/kubernetes/daemonset.go index a4cd62ff59..b874f3f650 100644 --- a/pkg/cluster/kubernetes/daemonset.go +++ b/pkg/cluster/kubernetes/daemonset.go @@ -90,6 +90,12 @@ func updateDaemonset(ctx context.Context, clientset *kubernetes.Clientset, ds st func upgradeDaemonset(ctx context.Context, clientset *kubernetes.Clientset, ds string, options UpgradeOptions) error { options.Log("updating daemonset %q to version %q", ds, options.ToVersion) + if options.DryRun { + options.Log("skipped in dry-run") + + return nil + } + return updateDaemonset(ctx, clientset, ds, func(daemonset *appsv1.DaemonSet) error { if len(daemonset.Spec.Template.Spec.Containers) != 1 { return fmt.Errorf("unexpected number of containers: %d", len(daemonset.Spec.Template.Spec.Containers)) diff --git a/pkg/cluster/kubernetes/talos_managed.go b/pkg/cluster/kubernetes/talos_managed.go index b5329e5ca6..1f3cd046d9 100644 --- a/pkg/cluster/kubernetes/talos_managed.go +++ b/pkg/cluster/kubernetes/talos_managed.go @@ -5,16 +5,26 @@ package kubernetes import ( + "bytes" "context" "errors" "fmt" + "io" + "strings" + "text/tabwriter" "time" "github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/state" "github.com/talos-systems/go-retry/retry" + v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/discovery" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/rest" "github.com/talos-systems/talos/pkg/cluster" "github.com/talos-systems/talos/pkg/kubernetes" @@ -31,6 +41,19 @@ type UpgradeProvider interface { cluster.K8sProvider } +var deprecations = map[string][]string{ + // https://kubernetes.io/blog/2021/07/14/upcoming-changes-in-kubernetes-1-22/#api-changes + "1.21->1.22": { + "validatingwebhookconfigurations.v1beta1.admissionregistration.k8s.io", + "mutatingwebhookconfigurations.v1beta1.admissionregistration.k8s.io", + "customresourcedefinitions.v1beta1.apiextensions.k8s.io", + "apiservices.v1beta1.apiregistration.k8s.io", + "leases.v1beta1.coordination.k8s.io", + "ingresses.v1beta1.extensions", + "ingresses.v1beta1.networking.k8s.io", + }, +} + // UpgradeTalosManaged the Kubernetes control plane. // //nolint:gocyclo @@ -49,6 +72,10 @@ func UpgradeTalosManaged(ctx context.Context, cluster UpgradeProvider, options U return fmt.Errorf("unsupported upgrade path %q (from %q to %q)", path, options.FromVersion, options.ToVersion) } + if err := checkDeprecated(ctx, cluster, options); err != nil { + return err + } + k8sClient, err := cluster.K8sHelper(ctx) if err != nil { return fmt.Errorf("error building kubernetes client: %w", err) @@ -140,6 +167,10 @@ func upgradeNodeConfigPatch(ctx context.Context, cluster UpgradeProvider, option } } + if options.DryRun { + return nil + } + options.Log(" > %q: machine configuration patched", node) options.Log(" > %q: waiting for API server state pod update", node) @@ -175,7 +206,7 @@ func upgradeNodeConfigPatch(ctx context.Context, cluster UpgradeProvider, option var errUpdateSkipped = fmt.Errorf("update skipped") -//nolint:gocyclo +//nolint:gocyclo,cyclop func upgradeConfigPatcher(options UpgradeOptions, service string, configResource resource.Resource) func(config *v1alpha1config.Config) error { return func(config *v1alpha1config.Config) error { if config.ClusterConfig == nil { @@ -185,6 +216,25 @@ func upgradeConfigPatcher(options UpgradeOptions, service string, configResource configData := configResource.(*resource.Any).Value().(map[string]interface{}) //nolint:errcheck,forcetypeassert configImage := configData["image"].(string) //nolint:errcheck,forcetypeassert + logUpdate := func(oldImage string) { + parts := strings.Split(oldImage, ":") + version := options.FromVersion + + if oldImage == "" { + version = options.FromVersion + } + + if len(parts) > 1 { + version = parts[1] + } + + options.Log(" > update %s: %s -> %s", service, version, options.ToVersion) + + if options.DryRun { + options.Log(" > skipped in dry-run") + } + } + switch service { case kubeAPIServer: if config.ClusterConfig.APIServerConfig == nil { @@ -197,6 +247,12 @@ func upgradeConfigPatcher(options UpgradeOptions, service string, configResource return errUpdateSkipped } + logUpdate(config.ClusterConfig.APIServerConfig.ContainerImage) + + if options.DryRun { + return errUpdateSkipped + } + config.ClusterConfig.APIServerConfig.ContainerImage = image case kubeControllerManager: if config.ClusterConfig.ControllerManagerConfig == nil { @@ -209,6 +265,12 @@ func upgradeConfigPatcher(options UpgradeOptions, service string, configResource return errUpdateSkipped } + logUpdate(config.ClusterConfig.ControllerManagerConfig.ContainerImage) + + if options.DryRun { + return errUpdateSkipped + } + config.ClusterConfig.ControllerManagerConfig.ContainerImage = image case kubeScheduler: if config.ClusterConfig.SchedulerConfig == nil { @@ -221,6 +283,12 @@ func upgradeConfigPatcher(options UpgradeOptions, service string, configResource return errUpdateSkipped } + logUpdate(config.ClusterConfig.SchedulerConfig.ContainerImage) + + if options.DryRun { + return errUpdateSkipped + } + config.ClusterConfig.SchedulerConfig.ContainerImage = image default: return fmt.Errorf("unsupported service %q", service) @@ -237,7 +305,7 @@ func checkPodStatus(ctx context.Context, cluster UpgradeProvider, service, node, return fmt.Errorf("error building kubernetes client: %w", err) } - pods, err := k8sClient.CoreV1().Pods(namespace).List(ctx, v1.ListOptions{ + pods, err := k8sClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ LabelSelector: fmt.Sprintf("k8s-app = %s", service), }) if err != nil { @@ -288,3 +356,137 @@ func checkPodStatus(ctx context.Context, cluster UpgradeProvider, service, node, return nil } + +//nolint:gocyclo,cyclop +func checkDeprecated(ctx context.Context, cluster UpgradeProvider, options UpgradeOptions) error { + options.Log("checking for resource APIs to be deprecated in version %s", options.ToVersion) + + config, err := cluster.K8sRestConfig(ctx) + if err != nil { + return err + } + + config.WarningHandler = rest.NewWarningWriter(io.Discard, rest.WarningWriterOptions{}) + + k8sClient, err := dynamic.NewForConfig(config) + if err != nil { + return fmt.Errorf("error building kubernetes client: %w", err) + } + + staticClient, err := cluster.K8sHelper(ctx) + if err != nil { + return fmt.Errorf("error building kubernetes client: %s", err) + } + + hasDeprecated := false + + warnings := bytes.NewBuffer([]byte{}) + + w := tabwriter.NewWriter(warnings, 0, 0, 3, ' ', 0) + + resources, ok := deprecations[options.Path()] + if !ok { + return nil + } + + var namespaces *v1.NamespaceList + + namespaces, err = staticClient.CoreV1().Namespaces().List(ctx, metav1.ListOptions{}) + if err != nil { + return err + } + + dc, err := discovery.NewDiscoveryClientForConfig(config) + if err != nil { + return err + } + + serverResources, err := dc.ServerPreferredNamespacedResources() + if err != nil { + return err + } + + namespacedResources := map[string]struct{}{} + + for _, list := range serverResources { + for _, resource := range list.APIResources { + namespacedResources[resource.Name] = struct{}{} + } + } + + for _, resource := range resources { + gvr, _ := schema.ParseResourceArg(resource) + + if gvr == nil { + return fmt.Errorf("failed to parse group version resource %s", resource) + } + + var res *unstructured.UnstructuredList + + count := 0 + + probeResources := func(namespaces ...v1.Namespace) error { + r := k8sClient.Resource(*gvr) + + namespaceNames := make([]string, 0, len(namespaces)) + + for _, ns := range namespaces { + namespaceNames = append(namespaceNames, ns.Name) + } + + if len(namespaceNames) == 0 { + namespaceNames = append(namespaceNames, "default") + } + + for _, ns := range namespaceNames { + if ns != "default" { + r.Namespace(ns) + } + + res, err = r.List(ctx, metav1.ListOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return nil + } + + return err + } + + count += len(res.Items) + } + + return nil + } + + checkNamespaces := []v1.Namespace{} + + if _, ok := namespacedResources[gvr.Resource]; ok { + checkNamespaces = namespaces.Items + } + + if err = probeResources(checkNamespaces...); err != nil { + return err + } + + if count > 0 { + if !hasDeprecated { + fmt.Fprintf(w, "RESOURCE\tCOUNT\n") + } + + hasDeprecated = true + + fmt.Fprintf(w, "%s\t%d\n", resource, len(res.Items)) + } + } + + if hasDeprecated { + if err = w.Flush(); err != nil { + return err + } + + options.Log("WARNING: found resources which are going to be deprecated/migrated in the version %s", options.ToVersion) + options.Log(warnings.String()) + } + + return nil +} diff --git a/pkg/cluster/kubernetes/upgrade.go b/pkg/cluster/kubernetes/upgrade.go index 99806fba4b..79a5be98fb 100644 --- a/pkg/cluster/kubernetes/upgrade.go +++ b/pkg/cluster/kubernetes/upgrade.go @@ -28,6 +28,7 @@ type UpgradeOptions struct { ControlPlaneEndpoint string LogOutput io.Writer + DryRun bool extraUpdaters []daemonsetUpdater masterNodes []string diff --git a/website/content/docs/v0.12/Guides/upgrading-kubernetes.md b/website/content/docs/v0.12/Guides/upgrading-kubernetes.md index ee4a20d264..abd787fc1c 100644 --- a/website/content/docs/v0.12/Guides/upgrading-kubernetes.md +++ b/website/content/docs/v0.12/Guides/upgrading-kubernetes.md @@ -14,33 +14,61 @@ To see a live demo of this writeup, see the video below: ## Automated Kubernetes Upgrade -To upgrade from Kubernetes v1.20.1 to v1.20.4 run: +To check what is going to be upgraded you can run `talosctl upgrade-k8s` with `--dry-run` flag: ```bash -$ talosctl --nodes upgrade-k8s --from 1.20.1 --to 1.20.4 -discovered master nodes ["172.20.0.2" "172.20.0.3" "172.20.0.4"] -updating "kube-apiserver" to version "1.20.4" - > updating node "172.20.0.2" -2021/03/09 19:55:01 retrying error: config version mismatch: got "2", expected "3" - > updating node "172.20.0.3" -2021/03/09 19:55:05 retrying error: config version mismatch: got "2", expected "3" - > updating node "172.20.0.4" -2021/03/09 19:55:07 retrying error: config version mismatch: got "2", expected "3" -updating "kube-controller-manager" to version "1.20.4" - > updating node "172.20.0.2" -2021/03/09 19:55:27 retrying error: config version mismatch: got "2", expected "3" - > updating node "172.20.0.3" -2021/03/09 19:55:47 retrying error: config version mismatch: got "2", expected "3" - > updating node "172.20.0.4" -2021/03/09 19:56:07 retrying error: config version mismatch: got "2", expected "3" -updating "kube-scheduler" to version "1.20.4" - > updating node "172.20.0.2" -2021/03/09 19:56:27 retrying error: config version mismatch: got "2", expected "3" - > updating node "172.20.0.3" -2021/03/09 19:56:47 retrying error: config version mismatch: got "2", expected "3" - > updating node "172.20.0.4" -2021/03/09 19:57:08 retrying error: config version mismatch: got "2", expected "3" -updating daemonset "kube-proxy" to version "1.20.4" +$ talosctl --nodes upgrade-k8s --from 1.21.3 --to 1.22.0 --dry-run +checking for resource APIs to be deprecated in version 1.22.0 +WARNING: found resources which are going to be deprecated/migrated in the version 1.22.0 +RESOURCE COUNT +validatingwebhookconfigurations.v1beta1.admissionregistration.k8s.io 4 +mutatingwebhookconfigurations.v1beta1.admissionregistration.k8s.io 3 +customresourcedefinitions.v1beta1.apiextensions.k8s.io 25 +apiservices.v1beta1.apiregistration.k8s.io 54 +leases.v1beta1.coordination.k8s.io 4 + +discovered master nodes ["10.5.0.2"] +updating "kube-apiserver" to version "1.22.0" + > "10.5.0.2": starting update + > update kube-apiserver: v1.21.3 -> 1.22.0 + > skipped in dry-run +updating "kube-controller-manager" to version "1.22.0" + > "10.5.0.2": starting update + > update kube-controller-manager: v1.21.3 -> 1.22.0 + > skipped in dry-run +updating "kube-scheduler" to version "1.22.0" + > "10.5.0.2": starting update + > update kube-scheduler: v1.21.3 -> 1.22.0 + > skipped in dry-run +updating daemonset "kube-proxy" to version "1.22.0" +skipped in dry-run +``` + +To upgrade Kubernetes from v1.21.3 to v1.22.0 run: + +```bash +$ talosctl --nodes upgrade-k8s --from 1.21.3 --to 1.22.0 +checking for resource APIs to be deprecated in version 1.22.0 +discovered master nodes ["10.5.0.2"] +updating "kube-apiserver" to version "1.22.0" + > "10.5.0.2": starting update + > update kube-apiserver: v1.21.3 -> 1.22.0 + > "10.5.0.2": machine configuration patched + > "10.5.0.2": waiting for API server state pod update + < "10.5.0.2": successfully updated +updating "kube-controller-manager" to version "1.22.0" + > "10.5.0.2": starting update + > update kube-controller-manager: v1.21.3 -> 1.22.0 + > "10.5.0.2": machine configuration patched + > "10.5.0.2": waiting for API server state pod update + < "10.5.0.2": successfully updated +updating "kube-scheduler" to version "1.22.0" + > "10.5.0.2": starting update + > update kube-scheduler: v1.21.3 -> 1.22.0 + > "10.5.0.2": machine configuration patched + > "10.5.0.2": waiting for API server state pod update + < "10.5.0.2": successfully updated +updating daemonset "kube-proxy" to version "1.22.0" ``` Script runs in two phases: diff --git a/website/content/docs/v0.12/Reference/cli.md b/website/content/docs/v0.12/Reference/cli.md index f7a7465b21..4f0300c867 100644 --- a/website/content/docs/v0.12/Reference/cli.md +++ b/website/content/docs/v0.12/Reference/cli.md @@ -2008,6 +2008,7 @@ talosctl upgrade-k8s [flags] ### Options ``` + --dry-run skip the actual upgrade and show the upgrade plan instead --endpoint string the cluster control plane endpoint --from string the Kubernetes control plane version to upgrade from -h, --help help for upgrade-k8s